OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/autofill/browser/address_field.h" | |
6 | |
7 #include <stddef.h> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/memory/scoped_ptr.h" | |
11 #include "base/strings/string16.h" | |
12 #include "base/strings/string_util.h" | |
13 #include "base/strings/utf_string_conversions.h" | |
14 #include "components/autofill/browser/autofill_field.h" | |
15 #include "components/autofill/browser/autofill_regex_constants.h" | |
16 #include "components/autofill/browser/autofill_scanner.h" | |
17 #include "components/autofill/browser/field_types.h" | |
18 #include "ui/base/l10n/l10n_util.h" | |
19 | |
20 namespace autofill { | |
21 | |
22 FormField* AddressField::Parse(AutofillScanner* scanner) { | |
23 if (scanner->IsEnd()) | |
24 return NULL; | |
25 | |
26 scoped_ptr<AddressField> address_field(new AddressField); | |
27 const AutofillField* const initial_field = scanner->Cursor(); | |
28 size_t saved_cursor = scanner->SaveCursor(); | |
29 | |
30 base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); | |
31 base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); | |
32 | |
33 // Allow address fields to appear in any order. | |
34 size_t begin_trailing_non_labeled_fields = 0; | |
35 bool has_trailing_non_labeled_fields = false; | |
36 while (!scanner->IsEnd()) { | |
37 const size_t cursor = scanner->SaveCursor(); | |
38 if (ParseAddressLines(scanner, address_field.get()) || | |
39 ParseCity(scanner, address_field.get()) || | |
40 ParseState(scanner, address_field.get()) || | |
41 ParseZipCode(scanner, address_field.get()) || | |
42 ParseCountry(scanner, address_field.get()) || | |
43 ParseCompany(scanner, address_field.get())) { | |
44 has_trailing_non_labeled_fields = false; | |
45 continue; | |
46 } else if (ParseField(scanner, attention_ignored, NULL) || | |
47 ParseField(scanner, region_ignored, NULL)) { | |
48 // We ignore the following: | |
49 // * Attention. | |
50 // * Province/Region/Other. | |
51 continue; | |
52 } else if (scanner->Cursor() != initial_field && | |
53 ParseEmptyLabel(scanner, NULL)) { | |
54 // Ignore non-labeled fields within an address; the page | |
55 // MapQuest Driving Directions North America.html contains such a field. | |
56 // We only ignore such fields after we've parsed at least one other field; | |
57 // otherwise we'd effectively parse address fields before other field | |
58 // types after any non-labeled fields, and we want email address fields to | |
59 // have precedence since some pages contain fields labeled | |
60 // "Email address". | |
61 if (!has_trailing_non_labeled_fields) { | |
62 has_trailing_non_labeled_fields = true; | |
63 begin_trailing_non_labeled_fields = cursor; | |
64 } | |
65 | |
66 continue; | |
67 } else { | |
68 // No field found. | |
69 break; | |
70 } | |
71 } | |
72 | |
73 // If we have identified any address fields in this field then it should be | |
74 // added to the list of fields. | |
75 if (address_field->company_ != NULL || | |
76 address_field->address1_ != NULL || address_field->address2_ != NULL || | |
77 address_field->city_ != NULL || address_field->state_ != NULL || | |
78 address_field->zip_ != NULL || address_field->zip4_ || | |
79 address_field->country_ != NULL) { | |
80 // Don't slurp non-labeled fields at the end into the address. | |
81 if (has_trailing_non_labeled_fields) | |
82 scanner->RewindTo(begin_trailing_non_labeled_fields); | |
83 | |
84 address_field->type_ = address_field->FindType(); | |
85 return address_field.release(); | |
86 } | |
87 | |
88 scanner->RewindTo(saved_cursor); | |
89 return NULL; | |
90 } | |
91 | |
92 AddressField::AddressType AddressField::FindType() const { | |
93 // First look at the field name, which itself will sometimes contain | |
94 // "bill" or "ship". | |
95 if (company_) { | |
96 base::string16 name = StringToLowerASCII(company_->name); | |
97 return AddressTypeFromText(name); | |
98 } | |
99 if (address1_) { | |
100 base::string16 name = StringToLowerASCII(address1_->name); | |
101 return AddressTypeFromText(name); | |
102 } | |
103 if (address2_) { | |
104 base::string16 name = StringToLowerASCII(address2_->name); | |
105 return AddressTypeFromText(name); | |
106 } | |
107 if (city_) { | |
108 base::string16 name = StringToLowerASCII(city_->name); | |
109 return AddressTypeFromText(name); | |
110 } | |
111 if (zip_) { | |
112 base::string16 name = StringToLowerASCII(zip_->name); | |
113 return AddressTypeFromText(name); | |
114 } | |
115 if (state_) { | |
116 base::string16 name = StringToLowerASCII(state_->name); | |
117 return AddressTypeFromText(name); | |
118 } | |
119 if (country_) { | |
120 base::string16 name = StringToLowerASCII(country_->name); | |
121 return AddressTypeFromText(name); | |
122 } | |
123 | |
124 return kGenericAddress; | |
125 } | |
126 | |
127 AddressField::AddressField() | |
128 : company_(NULL), | |
129 address1_(NULL), | |
130 address2_(NULL), | |
131 city_(NULL), | |
132 state_(NULL), | |
133 zip_(NULL), | |
134 zip4_(NULL), | |
135 country_(NULL), | |
136 type_(kGenericAddress) { | |
137 } | |
138 | |
139 bool AddressField::ClassifyField(FieldTypeMap* map) const { | |
140 AutofillFieldType address_company; | |
141 AutofillFieldType address_line1; | |
142 AutofillFieldType address_line2; | |
143 AutofillFieldType address_city; | |
144 AutofillFieldType address_state; | |
145 AutofillFieldType address_zip; | |
146 AutofillFieldType address_country; | |
147 | |
148 switch (type_) { | |
149 case kShippingAddress: | |
150 // Fall through. Autofill does not support shipping addresses. | |
151 case kGenericAddress: | |
152 address_company = COMPANY_NAME; | |
153 address_line1 = ADDRESS_HOME_LINE1; | |
154 address_line2 = ADDRESS_HOME_LINE2; | |
155 address_city = ADDRESS_HOME_CITY; | |
156 address_state = ADDRESS_HOME_STATE; | |
157 address_zip = ADDRESS_HOME_ZIP; | |
158 address_country = ADDRESS_HOME_COUNTRY; | |
159 break; | |
160 | |
161 case kBillingAddress: | |
162 address_company = COMPANY_NAME; | |
163 address_line1 = ADDRESS_BILLING_LINE1; | |
164 address_line2 = ADDRESS_BILLING_LINE2; | |
165 address_city = ADDRESS_BILLING_CITY; | |
166 address_state = ADDRESS_BILLING_STATE; | |
167 address_zip = ADDRESS_BILLING_ZIP; | |
168 address_country = ADDRESS_BILLING_COUNTRY; | |
169 break; | |
170 | |
171 default: | |
172 NOTREACHED(); | |
173 return false; | |
174 } | |
175 | |
176 bool ok = AddClassification(company_, address_company, map); | |
177 ok = ok && AddClassification(address1_, address_line1, map); | |
178 ok = ok && AddClassification(address2_, address_line2, map); | |
179 ok = ok && AddClassification(city_, address_city, map); | |
180 ok = ok && AddClassification(state_, address_state, map); | |
181 ok = ok && AddClassification(zip_, address_zip, map); | |
182 ok = ok && AddClassification(country_, address_country, map); | |
183 return ok; | |
184 } | |
185 | |
186 // static | |
187 bool AddressField::ParseCompany(AutofillScanner* scanner, | |
188 AddressField* address_field) { | |
189 if (address_field->company_ && !address_field->company_->IsEmpty()) | |
190 return false; | |
191 | |
192 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), | |
193 &address_field->company_); | |
194 } | |
195 | |
196 // static | |
197 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | |
198 AddressField* address_field) { | |
199 // We only match the string "address" in page text, not in element names, | |
200 // because sometimes every element in a group of address fields will have | |
201 // a name containing the string "address"; for example, on the page | |
202 // Kohl's - Register Billing Address.html the text element labeled "city" | |
203 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | |
204 // such as "address1", which appear as element names on various pages (eg | |
205 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | |
206 // EBay Registration Enter Information.html). | |
207 if (address_field->address1_) | |
208 return false; | |
209 | |
210 base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); | |
211 base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); | |
212 | |
213 if (!ParseField(scanner, pattern, &address_field->address1_) && | |
214 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
215 &address_field->address1_)) { | |
216 return false; | |
217 } | |
218 | |
219 // Optionally parse more address lines, which may have empty labels. | |
220 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | |
221 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | |
222 pattern = UTF8ToUTF16(autofill::kAddressLine2Re); | |
223 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); | |
224 if (!ParseEmptyLabel(scanner, &address_field->address2_) && | |
225 !ParseField(scanner, pattern, &address_field->address2_)) { | |
226 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
227 &address_field->address2_); | |
228 } | |
229 | |
230 // Try for surplus lines, which we will promptly discard. | |
231 if (address_field->address2_ != NULL) { | |
232 pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe); | |
233 while (ParseField(scanner, pattern, NULL)) { | |
234 // Consumed a surplus line, try for another. | |
235 } | |
236 } | |
237 | |
238 return true; | |
239 } | |
240 | |
241 // static | |
242 bool AddressField::ParseCountry(AutofillScanner* scanner, | |
243 AddressField* address_field) { | |
244 // Parse a country. The occasional page (e.g. | |
245 // Travelocity_New Member Information1.html) calls this a "location". | |
246 if (address_field->country_ && !address_field->country_->IsEmpty()) | |
247 return false; | |
248 | |
249 return ParseFieldSpecifics(scanner, | |
250 UTF8ToUTF16(autofill::kCountryRe), | |
251 MATCH_DEFAULT | MATCH_SELECT, | |
252 &address_field->country_); | |
253 } | |
254 | |
255 // static | |
256 bool AddressField::ParseZipCode(AutofillScanner* scanner, | |
257 AddressField* address_field) { | |
258 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
259 // is called a "post code". | |
260 // | |
261 // HACK: Just for the MapQuest driving directions page we match the | |
262 // exact name "1z", which MapQuest uses to label its zip code field. | |
263 // Hopefully before long we'll be smart enough to find the zip code | |
264 // on that page automatically. | |
265 if (address_field->zip_) | |
266 return false; | |
267 | |
268 base::string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); | |
269 if (!ParseField(scanner, pattern, &address_field->zip_)) | |
270 return false; | |
271 | |
272 address_field->type_ = kGenericAddress; | |
273 // Look for a zip+4, whose field name will also often contain | |
274 // the substring "zip". | |
275 ParseField(scanner, | |
276 UTF8ToUTF16(autofill::kZip4Re), | |
277 &address_field->zip4_); | |
278 | |
279 return true; | |
280 } | |
281 | |
282 // static | |
283 bool AddressField::ParseCity(AutofillScanner* scanner, | |
284 AddressField* address_field) { | |
285 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
286 // the term "town". | |
287 if (address_field->city_) | |
288 return false; | |
289 | |
290 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
291 return ParseFieldSpecifics(scanner, | |
292 UTF8ToUTF16(autofill::kCityRe), | |
293 MATCH_DEFAULT | MATCH_SELECT, | |
294 &address_field->city_); | |
295 } | |
296 | |
297 // static | |
298 bool AddressField::ParseState(AutofillScanner* scanner, | |
299 AddressField* address_field) { | |
300 if (address_field->state_) | |
301 return false; | |
302 | |
303 return ParseFieldSpecifics(scanner, | |
304 UTF8ToUTF16(autofill::kStateRe), | |
305 MATCH_DEFAULT | MATCH_SELECT, | |
306 &address_field->state_); | |
307 } | |
308 | |
309 AddressField::AddressType AddressField::AddressTypeFromText( | |
310 const base::string16 &text) { | |
311 size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); | |
312 size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); | |
313 if (same_as != base::string16::npos || use_shipping != base::string16::npos) | |
314 // This text could be a checkbox label such as "same as my billing | |
315 // address" or "use my shipping address". | |
316 // ++ It would help if we generally skipped all text that appears | |
317 // after a check box. | |
318 return kGenericAddress; | |
319 | |
320 // Not all pages say "billing address" and "shipping address" explicitly; | |
321 // for example, Craft Catalog1.html has "Bill-to Address" and | |
322 // "Ship-to Address". | |
323 size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); | |
324 size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); | |
325 | |
326 if (bill == base::string16::npos && ship == base::string16::npos) | |
327 return kGenericAddress; | |
328 | |
329 if (bill != base::string16::npos && ship == base::string16::npos) | |
330 return kBillingAddress; | |
331 | |
332 if (bill == base::string16::npos && ship != base::string16::npos) | |
333 return kShippingAddress; | |
334 | |
335 if (bill > ship) | |
336 return kBillingAddress; | |
337 | |
338 return kShippingAddress; | |
339 } | |
340 | |
341 } // namespace autofill | |
OLD | NEW |