OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/autofill/address_field.h" | |
6 | |
7 #include <stddef.h> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/memory/scoped_ptr.h" | |
11 #include "base/string16.h" | |
12 #include "base/string_util.h" | |
13 #include "base/utf_string_conversions.h" | |
14 #include "chrome/browser/autofill/autofill_field.h" | |
15 #include "chrome/browser/autofill/autofill_regex_constants.h" | |
16 #include "chrome/browser/autofill/autofill_scanner.h" | |
17 #include "chrome/browser/autofill/field_types.h" | |
18 #include "ui/base/l10n/l10n_util.h" | |
19 | |
20 FormField* AddressField::Parse(AutofillScanner* scanner) { | |
21 if (scanner->IsEnd()) | |
22 return NULL; | |
23 | |
24 scoped_ptr<AddressField> address_field(new AddressField); | |
25 const AutofillField* const initial_field = scanner->Cursor(); | |
26 size_t saved_cursor = scanner->SaveCursor(); | |
27 | |
28 string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); | |
29 string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); | |
30 | |
31 // Allow address fields to appear in any order. | |
32 size_t begin_trailing_non_labeled_fields = 0; | |
33 bool has_trailing_non_labeled_fields = false; | |
34 while (!scanner->IsEnd()) { | |
35 const size_t cursor = scanner->SaveCursor(); | |
36 if (ParseAddressLines(scanner, address_field.get()) || | |
37 ParseCity(scanner, address_field.get()) || | |
38 ParseState(scanner, address_field.get()) || | |
39 ParseZipCode(scanner, address_field.get()) || | |
40 ParseCountry(scanner, address_field.get()) || | |
41 ParseCompany(scanner, address_field.get())) { | |
42 has_trailing_non_labeled_fields = false; | |
43 continue; | |
44 } else if (ParseField(scanner, attention_ignored, NULL) || | |
45 ParseField(scanner, region_ignored, NULL)) { | |
46 // We ignore the following: | |
47 // * Attention. | |
48 // * Province/Region/Other. | |
49 continue; | |
50 } else if (scanner->Cursor() != initial_field && | |
51 ParseEmptyLabel(scanner, NULL)) { | |
52 // Ignore non-labeled fields within an address; the page | |
53 // MapQuest Driving Directions North America.html contains such a field. | |
54 // We only ignore such fields after we've parsed at least one other field; | |
55 // otherwise we'd effectively parse address fields before other field | |
56 // types after any non-labeled fields, and we want email address fields to | |
57 // have precedence since some pages contain fields labeled | |
58 // "Email address". | |
59 if (!has_trailing_non_labeled_fields) { | |
60 has_trailing_non_labeled_fields = true; | |
61 begin_trailing_non_labeled_fields = cursor; | |
62 } | |
63 | |
64 continue; | |
65 } else { | |
66 // No field found. | |
67 break; | |
68 } | |
69 } | |
70 | |
71 // If we have identified any address fields in this field then it should be | |
72 // added to the list of fields. | |
73 if (address_field->company_ != NULL || | |
74 address_field->address1_ != NULL || address_field->address2_ != NULL || | |
75 address_field->city_ != NULL || address_field->state_ != NULL || | |
76 address_field->zip_ != NULL || address_field->zip4_ || | |
77 address_field->country_ != NULL) { | |
78 // Don't slurp non-labeled fields at the end into the address. | |
79 if (has_trailing_non_labeled_fields) | |
80 scanner->RewindTo(begin_trailing_non_labeled_fields); | |
81 | |
82 address_field->type_ = address_field->FindType(); | |
83 return address_field.release(); | |
84 } | |
85 | |
86 scanner->RewindTo(saved_cursor); | |
87 return NULL; | |
88 } | |
89 | |
90 AddressField::AddressType AddressField::FindType() const { | |
91 // First look at the field name, which itself will sometimes contain | |
92 // "bill" or "ship". | |
93 if (company_) { | |
94 string16 name = StringToLowerASCII(company_->name); | |
95 return AddressTypeFromText(name); | |
96 } | |
97 if (address1_) { | |
98 string16 name = StringToLowerASCII(address1_->name); | |
99 return AddressTypeFromText(name); | |
100 } | |
101 if (address2_) { | |
102 string16 name = StringToLowerASCII(address2_->name); | |
103 return AddressTypeFromText(name); | |
104 } | |
105 if (city_) { | |
106 string16 name = StringToLowerASCII(city_->name); | |
107 return AddressTypeFromText(name); | |
108 } | |
109 if (zip_) { | |
110 string16 name = StringToLowerASCII(zip_->name); | |
111 return AddressTypeFromText(name); | |
112 } | |
113 if (state_) { | |
114 string16 name = StringToLowerASCII(state_->name); | |
115 return AddressTypeFromText(name); | |
116 } | |
117 if (country_) { | |
118 string16 name = StringToLowerASCII(country_->name); | |
119 return AddressTypeFromText(name); | |
120 } | |
121 | |
122 return kGenericAddress; | |
123 } | |
124 | |
125 AddressField::AddressField() | |
126 : company_(NULL), | |
127 address1_(NULL), | |
128 address2_(NULL), | |
129 city_(NULL), | |
130 state_(NULL), | |
131 zip_(NULL), | |
132 zip4_(NULL), | |
133 country_(NULL), | |
134 type_(kGenericAddress) { | |
135 } | |
136 | |
137 bool AddressField::ClassifyField(FieldTypeMap* map) const { | |
138 AutofillFieldType address_company; | |
139 AutofillFieldType address_line1; | |
140 AutofillFieldType address_line2; | |
141 AutofillFieldType address_city; | |
142 AutofillFieldType address_state; | |
143 AutofillFieldType address_zip; | |
144 AutofillFieldType address_country; | |
145 | |
146 switch (type_) { | |
147 case kShippingAddress: | |
148 // Fall through. Autofill does not support shipping addresses. | |
149 case kGenericAddress: | |
150 address_company = COMPANY_NAME; | |
151 address_line1 = ADDRESS_HOME_LINE1; | |
152 address_line2 = ADDRESS_HOME_LINE2; | |
153 address_city = ADDRESS_HOME_CITY; | |
154 address_state = ADDRESS_HOME_STATE; | |
155 address_zip = ADDRESS_HOME_ZIP; | |
156 address_country = ADDRESS_HOME_COUNTRY; | |
157 break; | |
158 | |
159 case kBillingAddress: | |
160 address_company = COMPANY_NAME; | |
161 address_line1 = ADDRESS_BILLING_LINE1; | |
162 address_line2 = ADDRESS_BILLING_LINE2; | |
163 address_city = ADDRESS_BILLING_CITY; | |
164 address_state = ADDRESS_BILLING_STATE; | |
165 address_zip = ADDRESS_BILLING_ZIP; | |
166 address_country = ADDRESS_BILLING_COUNTRY; | |
167 break; | |
168 | |
169 default: | |
170 NOTREACHED(); | |
171 return false; | |
172 } | |
173 | |
174 bool ok = AddClassification(company_, address_company, map); | |
175 ok = ok && AddClassification(address1_, address_line1, map); | |
176 ok = ok && AddClassification(address2_, address_line2, map); | |
177 ok = ok && AddClassification(city_, address_city, map); | |
178 ok = ok && AddClassification(state_, address_state, map); | |
179 ok = ok && AddClassification(zip_, address_zip, map); | |
180 ok = ok && AddClassification(country_, address_country, map); | |
181 return ok; | |
182 } | |
183 | |
184 // static | |
185 bool AddressField::ParseCompany(AutofillScanner* scanner, | |
186 AddressField* address_field) { | |
187 if (address_field->company_ && !address_field->company_->IsEmpty()) | |
188 return false; | |
189 | |
190 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), | |
191 &address_field->company_); | |
192 } | |
193 | |
194 // static | |
195 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | |
196 AddressField* address_field) { | |
197 // We only match the string "address" in page text, not in element names, | |
198 // because sometimes every element in a group of address fields will have | |
199 // a name containing the string "address"; for example, on the page | |
200 // Kohl's - Register Billing Address.html the text element labeled "city" | |
201 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | |
202 // such as "address1", which appear as element names on various pages (eg | |
203 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | |
204 // EBay Registration Enter Information.html). | |
205 if (address_field->address1_) | |
206 return false; | |
207 | |
208 string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); | |
209 string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); | |
210 | |
211 if (!ParseField(scanner, pattern, &address_field->address1_) && | |
212 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
213 &address_field->address1_)) { | |
214 return false; | |
215 } | |
216 | |
217 // Optionally parse more address lines, which may have empty labels. | |
218 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | |
219 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | |
220 pattern = UTF8ToUTF16(autofill::kAddressLine2Re); | |
221 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); | |
222 if (!ParseEmptyLabel(scanner, &address_field->address2_) && | |
223 !ParseField(scanner, pattern, &address_field->address2_)) { | |
224 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
225 &address_field->address2_); | |
226 } | |
227 | |
228 // Try for a third line, which we will promptly discard. | |
229 if (address_field->address2_ != NULL) { | |
230 pattern = UTF8ToUTF16(autofill::kAddressLine3Re); | |
231 ParseField(scanner, pattern, NULL); | |
232 } | |
233 | |
234 return true; | |
235 } | |
236 | |
237 // static | |
238 bool AddressField::ParseCountry(AutofillScanner* scanner, | |
239 AddressField* address_field) { | |
240 // Parse a country. The occasional page (e.g. | |
241 // Travelocity_New Member Information1.html) calls this a "location". | |
242 if (address_field->country_ && !address_field->country_->IsEmpty()) | |
243 return false; | |
244 | |
245 return ParseFieldSpecifics(scanner, | |
246 UTF8ToUTF16(autofill::kCountryRe), | |
247 MATCH_DEFAULT | MATCH_SELECT, | |
248 &address_field->country_); | |
249 } | |
250 | |
251 // static | |
252 bool AddressField::ParseZipCode(AutofillScanner* scanner, | |
253 AddressField* address_field) { | |
254 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
255 // is called a "post code". | |
256 // | |
257 // HACK: Just for the MapQuest driving directions page we match the | |
258 // exact name "1z", which MapQuest uses to label its zip code field. | |
259 // Hopefully before long we'll be smart enough to find the zip code | |
260 // on that page automatically. | |
261 if (address_field->zip_) | |
262 return false; | |
263 | |
264 string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); | |
265 if (!ParseField(scanner, pattern, &address_field->zip_)) | |
266 return false; | |
267 | |
268 address_field->type_ = kGenericAddress; | |
269 // Look for a zip+4, whose field name will also often contain | |
270 // the substring "zip". | |
271 ParseField(scanner, | |
272 UTF8ToUTF16(autofill::kZip4Re), | |
273 &address_field->zip4_); | |
274 | |
275 return true; | |
276 } | |
277 | |
278 // static | |
279 bool AddressField::ParseCity(AutofillScanner* scanner, | |
280 AddressField* address_field) { | |
281 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
282 // the term "town". | |
283 if (address_field->city_) | |
284 return false; | |
285 | |
286 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
287 return ParseFieldSpecifics(scanner, | |
288 UTF8ToUTF16(autofill::kCityRe), | |
289 MATCH_DEFAULT | MATCH_SELECT, | |
290 &address_field->city_); | |
291 } | |
292 | |
293 // static | |
294 bool AddressField::ParseState(AutofillScanner* scanner, | |
295 AddressField* address_field) { | |
296 if (address_field->state_) | |
297 return false; | |
298 | |
299 return ParseFieldSpecifics(scanner, | |
300 UTF8ToUTF16(autofill::kStateRe), | |
301 MATCH_DEFAULT | MATCH_SELECT, | |
302 &address_field->state_); | |
303 } | |
304 | |
305 AddressField::AddressType AddressField::AddressTypeFromText( | |
306 const string16 &text) { | |
307 size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); | |
308 size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); | |
309 if (same_as != string16::npos || use_shipping != string16::npos) | |
310 // This text could be a checkbox label such as "same as my billing | |
311 // address" or "use my shipping address". | |
312 // ++ It would help if we generally skipped all text that appears | |
313 // after a check box. | |
314 return kGenericAddress; | |
315 | |
316 // Not all pages say "billing address" and "shipping address" explicitly; | |
317 // for example, Craft Catalog1.html has "Bill-to Address" and | |
318 // "Ship-to Address". | |
319 size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); | |
320 size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); | |
321 | |
322 if (bill == string16::npos && ship == string16::npos) | |
323 return kGenericAddress; | |
324 | |
325 if (bill != string16::npos && ship == string16::npos) | |
326 return kBillingAddress; | |
327 | |
328 if (bill == string16::npos && ship != string16::npos) | |
329 return kShippingAddress; | |
330 | |
331 if (bill > ship) | |
332 return kBillingAddress; | |
333 | |
334 return kShippingAddress; | |
335 } | |
OLD | NEW |