Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(387)

Side by Side Diff: components/autofill/browser/address_field.cc

Issue 17392006: In components/autofill, move browser/ to core/browser/ (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase to fix conflicts Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/browser/address_field.h"
6
7 #include <stddef.h>
8
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/autofill/browser/autofill_field.h"
15 #include "components/autofill/browser/autofill_regex_constants.h"
16 #include "components/autofill/browser/autofill_scanner.h"
17 #include "components/autofill/browser/field_types.h"
18 #include "ui/base/l10n/l10n_util.h"
19
20 namespace autofill {
21
22 FormField* AddressField::Parse(AutofillScanner* scanner) {
23 if (scanner->IsEnd())
24 return NULL;
25
26 scoped_ptr<AddressField> address_field(new AddressField);
27 const AutofillField* const initial_field = scanner->Cursor();
28 size_t saved_cursor = scanner->SaveCursor();
29
30 base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe);
31 base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe);
32
33 // Allow address fields to appear in any order.
34 size_t begin_trailing_non_labeled_fields = 0;
35 bool has_trailing_non_labeled_fields = false;
36 while (!scanner->IsEnd()) {
37 const size_t cursor = scanner->SaveCursor();
38 if (ParseAddressLines(scanner, address_field.get()) ||
39 ParseCity(scanner, address_field.get()) ||
40 ParseState(scanner, address_field.get()) ||
41 ParseZipCode(scanner, address_field.get()) ||
42 ParseCountry(scanner, address_field.get()) ||
43 ParseCompany(scanner, address_field.get())) {
44 has_trailing_non_labeled_fields = false;
45 continue;
46 } else if (ParseField(scanner, attention_ignored, NULL) ||
47 ParseField(scanner, region_ignored, NULL)) {
48 // We ignore the following:
49 // * Attention.
50 // * Province/Region/Other.
51 continue;
52 } else if (scanner->Cursor() != initial_field &&
53 ParseEmptyLabel(scanner, NULL)) {
54 // Ignore non-labeled fields within an address; the page
55 // MapQuest Driving Directions North America.html contains such a field.
56 // We only ignore such fields after we've parsed at least one other field;
57 // otherwise we'd effectively parse address fields before other field
58 // types after any non-labeled fields, and we want email address fields to
59 // have precedence since some pages contain fields labeled
60 // "Email address".
61 if (!has_trailing_non_labeled_fields) {
62 has_trailing_non_labeled_fields = true;
63 begin_trailing_non_labeled_fields = cursor;
64 }
65
66 continue;
67 } else {
68 // No field found.
69 break;
70 }
71 }
72
73 // If we have identified any address fields in this field then it should be
74 // added to the list of fields.
75 if (address_field->company_ != NULL ||
76 address_field->address1_ != NULL || address_field->address2_ != NULL ||
77 address_field->city_ != NULL || address_field->state_ != NULL ||
78 address_field->zip_ != NULL || address_field->zip4_ ||
79 address_field->country_ != NULL) {
80 // Don't slurp non-labeled fields at the end into the address.
81 if (has_trailing_non_labeled_fields)
82 scanner->RewindTo(begin_trailing_non_labeled_fields);
83
84 address_field->type_ = address_field->FindType();
85 return address_field.release();
86 }
87
88 scanner->RewindTo(saved_cursor);
89 return NULL;
90 }
91
92 AddressField::AddressType AddressField::FindType() const {
93 // First look at the field name, which itself will sometimes contain
94 // "bill" or "ship".
95 if (company_) {
96 base::string16 name = StringToLowerASCII(company_->name);
97 return AddressTypeFromText(name);
98 }
99 if (address1_) {
100 base::string16 name = StringToLowerASCII(address1_->name);
101 return AddressTypeFromText(name);
102 }
103 if (address2_) {
104 base::string16 name = StringToLowerASCII(address2_->name);
105 return AddressTypeFromText(name);
106 }
107 if (city_) {
108 base::string16 name = StringToLowerASCII(city_->name);
109 return AddressTypeFromText(name);
110 }
111 if (zip_) {
112 base::string16 name = StringToLowerASCII(zip_->name);
113 return AddressTypeFromText(name);
114 }
115 if (state_) {
116 base::string16 name = StringToLowerASCII(state_->name);
117 return AddressTypeFromText(name);
118 }
119 if (country_) {
120 base::string16 name = StringToLowerASCII(country_->name);
121 return AddressTypeFromText(name);
122 }
123
124 return kGenericAddress;
125 }
126
127 AddressField::AddressField()
128 : company_(NULL),
129 address1_(NULL),
130 address2_(NULL),
131 city_(NULL),
132 state_(NULL),
133 zip_(NULL),
134 zip4_(NULL),
135 country_(NULL),
136 type_(kGenericAddress) {
137 }
138
139 bool AddressField::ClassifyField(FieldTypeMap* map) const {
140 AutofillFieldType address_company;
141 AutofillFieldType address_line1;
142 AutofillFieldType address_line2;
143 AutofillFieldType address_city;
144 AutofillFieldType address_state;
145 AutofillFieldType address_zip;
146 AutofillFieldType address_country;
147
148 switch (type_) {
149 case kShippingAddress:
150 // Fall through. Autofill does not support shipping addresses.
151 case kGenericAddress:
152 address_company = COMPANY_NAME;
153 address_line1 = ADDRESS_HOME_LINE1;
154 address_line2 = ADDRESS_HOME_LINE2;
155 address_city = ADDRESS_HOME_CITY;
156 address_state = ADDRESS_HOME_STATE;
157 address_zip = ADDRESS_HOME_ZIP;
158 address_country = ADDRESS_HOME_COUNTRY;
159 break;
160
161 case kBillingAddress:
162 address_company = COMPANY_NAME;
163 address_line1 = ADDRESS_BILLING_LINE1;
164 address_line2 = ADDRESS_BILLING_LINE2;
165 address_city = ADDRESS_BILLING_CITY;
166 address_state = ADDRESS_BILLING_STATE;
167 address_zip = ADDRESS_BILLING_ZIP;
168 address_country = ADDRESS_BILLING_COUNTRY;
169 break;
170
171 default:
172 NOTREACHED();
173 return false;
174 }
175
176 bool ok = AddClassification(company_, address_company, map);
177 ok = ok && AddClassification(address1_, address_line1, map);
178 ok = ok && AddClassification(address2_, address_line2, map);
179 ok = ok && AddClassification(city_, address_city, map);
180 ok = ok && AddClassification(state_, address_state, map);
181 ok = ok && AddClassification(zip_, address_zip, map);
182 ok = ok && AddClassification(country_, address_country, map);
183 return ok;
184 }
185
186 // static
187 bool AddressField::ParseCompany(AutofillScanner* scanner,
188 AddressField* address_field) {
189 if (address_field->company_ && !address_field->company_->IsEmpty())
190 return false;
191
192 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe),
193 &address_field->company_);
194 }
195
196 // static
197 bool AddressField::ParseAddressLines(AutofillScanner* scanner,
198 AddressField* address_field) {
199 // We only match the string "address" in page text, not in element names,
200 // because sometimes every element in a group of address fields will have
201 // a name containing the string "address"; for example, on the page
202 // Kohl's - Register Billing Address.html the text element labeled "city"
203 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
204 // such as "address1", which appear as element names on various pages (eg
205 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
206 // EBay Registration Enter Information.html).
207 if (address_field->address1_)
208 return false;
209
210 base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re);
211 base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe);
212
213 if (!ParseField(scanner, pattern, &address_field->address1_) &&
214 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
215 &address_field->address1_)) {
216 return false;
217 }
218
219 // Optionally parse more address lines, which may have empty labels.
220 // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
221 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
222 pattern = UTF8ToUTF16(autofill::kAddressLine2Re);
223 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe);
224 if (!ParseEmptyLabel(scanner, &address_field->address2_) &&
225 !ParseField(scanner, pattern, &address_field->address2_)) {
226 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
227 &address_field->address2_);
228 }
229
230 // Try for surplus lines, which we will promptly discard.
231 if (address_field->address2_ != NULL) {
232 pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe);
233 while (ParseField(scanner, pattern, NULL)) {
234 // Consumed a surplus line, try for another.
235 }
236 }
237
238 return true;
239 }
240
241 // static
242 bool AddressField::ParseCountry(AutofillScanner* scanner,
243 AddressField* address_field) {
244 // Parse a country. The occasional page (e.g.
245 // Travelocity_New Member Information1.html) calls this a "location".
246 if (address_field->country_ && !address_field->country_->IsEmpty())
247 return false;
248
249 return ParseFieldSpecifics(scanner,
250 UTF8ToUTF16(autofill::kCountryRe),
251 MATCH_DEFAULT | MATCH_SELECT,
252 &address_field->country_);
253 }
254
255 // static
256 bool AddressField::ParseZipCode(AutofillScanner* scanner,
257 AddressField* address_field) {
258 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
259 // is called a "post code".
260 //
261 // HACK: Just for the MapQuest driving directions page we match the
262 // exact name "1z", which MapQuest uses to label its zip code field.
263 // Hopefully before long we'll be smart enough to find the zip code
264 // on that page automatically.
265 if (address_field->zip_)
266 return false;
267
268 base::string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe);
269 if (!ParseField(scanner, pattern, &address_field->zip_))
270 return false;
271
272 address_field->type_ = kGenericAddress;
273 // Look for a zip+4, whose field name will also often contain
274 // the substring "zip".
275 ParseField(scanner,
276 UTF8ToUTF16(autofill::kZip4Re),
277 &address_field->zip4_);
278
279 return true;
280 }
281
282 // static
283 bool AddressField::ParseCity(AutofillScanner* scanner,
284 AddressField* address_field) {
285 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
286 // the term "town".
287 if (address_field->city_)
288 return false;
289
290 // Select fields are allowed here. This occurs on top-100 site rediff.com.
291 return ParseFieldSpecifics(scanner,
292 UTF8ToUTF16(autofill::kCityRe),
293 MATCH_DEFAULT | MATCH_SELECT,
294 &address_field->city_);
295 }
296
297 // static
298 bool AddressField::ParseState(AutofillScanner* scanner,
299 AddressField* address_field) {
300 if (address_field->state_)
301 return false;
302
303 return ParseFieldSpecifics(scanner,
304 UTF8ToUTF16(autofill::kStateRe),
305 MATCH_DEFAULT | MATCH_SELECT,
306 &address_field->state_);
307 }
308
309 AddressField::AddressType AddressField::AddressTypeFromText(
310 const base::string16 &text) {
311 size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe));
312 size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe));
313 if (same_as != base::string16::npos || use_shipping != base::string16::npos)
314 // This text could be a checkbox label such as "same as my billing
315 // address" or "use my shipping address".
316 // ++ It would help if we generally skipped all text that appears
317 // after a check box.
318 return kGenericAddress;
319
320 // Not all pages say "billing address" and "shipping address" explicitly;
321 // for example, Craft Catalog1.html has "Bill-to Address" and
322 // "Ship-to Address".
323 size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe));
324 size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe));
325
326 if (bill == base::string16::npos && ship == base::string16::npos)
327 return kGenericAddress;
328
329 if (bill != base::string16::npos && ship == base::string16::npos)
330 return kBillingAddress;
331
332 if (bill == base::string16::npos && ship != base::string16::npos)
333 return kShippingAddress;
334
335 if (bill > ship)
336 return kBillingAddress;
337
338 return kShippingAddress;
339 }
340
341 } // namespace autofill
OLDNEW
« no previous file with comments | « components/autofill/browser/address_field.h ('k') | components/autofill/browser/address_field_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698