| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "components/autofill/browser/address_field.h" | |
| 6 | |
| 7 #include <stddef.h> | |
| 8 | |
| 9 #include "base/logging.h" | |
| 10 #include "base/memory/scoped_ptr.h" | |
| 11 #include "base/strings/string16.h" | |
| 12 #include "base/strings/string_util.h" | |
| 13 #include "base/strings/utf_string_conversions.h" | |
| 14 #include "components/autofill/browser/autofill_field.h" | |
| 15 #include "components/autofill/browser/autofill_regex_constants.h" | |
| 16 #include "components/autofill/browser/autofill_scanner.h" | |
| 17 #include "components/autofill/browser/field_types.h" | |
| 18 #include "ui/base/l10n/l10n_util.h" | |
| 19 | |
| 20 namespace autofill { | |
| 21 | |
| 22 FormField* AddressField::Parse(AutofillScanner* scanner) { | |
| 23 if (scanner->IsEnd()) | |
| 24 return NULL; | |
| 25 | |
| 26 scoped_ptr<AddressField> address_field(new AddressField); | |
| 27 const AutofillField* const initial_field = scanner->Cursor(); | |
| 28 size_t saved_cursor = scanner->SaveCursor(); | |
| 29 | |
| 30 base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); | |
| 31 base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); | |
| 32 | |
| 33 // Allow address fields to appear in any order. | |
| 34 size_t begin_trailing_non_labeled_fields = 0; | |
| 35 bool has_trailing_non_labeled_fields = false; | |
| 36 while (!scanner->IsEnd()) { | |
| 37 const size_t cursor = scanner->SaveCursor(); | |
| 38 if (ParseAddressLines(scanner, address_field.get()) || | |
| 39 ParseCity(scanner, address_field.get()) || | |
| 40 ParseState(scanner, address_field.get()) || | |
| 41 ParseZipCode(scanner, address_field.get()) || | |
| 42 ParseCountry(scanner, address_field.get()) || | |
| 43 ParseCompany(scanner, address_field.get())) { | |
| 44 has_trailing_non_labeled_fields = false; | |
| 45 continue; | |
| 46 } else if (ParseField(scanner, attention_ignored, NULL) || | |
| 47 ParseField(scanner, region_ignored, NULL)) { | |
| 48 // We ignore the following: | |
| 49 // * Attention. | |
| 50 // * Province/Region/Other. | |
| 51 continue; | |
| 52 } else if (scanner->Cursor() != initial_field && | |
| 53 ParseEmptyLabel(scanner, NULL)) { | |
| 54 // Ignore non-labeled fields within an address; the page | |
| 55 // MapQuest Driving Directions North America.html contains such a field. | |
| 56 // We only ignore such fields after we've parsed at least one other field; | |
| 57 // otherwise we'd effectively parse address fields before other field | |
| 58 // types after any non-labeled fields, and we want email address fields to | |
| 59 // have precedence since some pages contain fields labeled | |
| 60 // "Email address". | |
| 61 if (!has_trailing_non_labeled_fields) { | |
| 62 has_trailing_non_labeled_fields = true; | |
| 63 begin_trailing_non_labeled_fields = cursor; | |
| 64 } | |
| 65 | |
| 66 continue; | |
| 67 } else { | |
| 68 // No field found. | |
| 69 break; | |
| 70 } | |
| 71 } | |
| 72 | |
| 73 // If we have identified any address fields in this field then it should be | |
| 74 // added to the list of fields. | |
| 75 if (address_field->company_ != NULL || | |
| 76 address_field->address1_ != NULL || address_field->address2_ != NULL || | |
| 77 address_field->city_ != NULL || address_field->state_ != NULL || | |
| 78 address_field->zip_ != NULL || address_field->zip4_ || | |
| 79 address_field->country_ != NULL) { | |
| 80 // Don't slurp non-labeled fields at the end into the address. | |
| 81 if (has_trailing_non_labeled_fields) | |
| 82 scanner->RewindTo(begin_trailing_non_labeled_fields); | |
| 83 | |
| 84 address_field->type_ = address_field->FindType(); | |
| 85 return address_field.release(); | |
| 86 } | |
| 87 | |
| 88 scanner->RewindTo(saved_cursor); | |
| 89 return NULL; | |
| 90 } | |
| 91 | |
| 92 AddressField::AddressType AddressField::FindType() const { | |
| 93 // First look at the field name, which itself will sometimes contain | |
| 94 // "bill" or "ship". | |
| 95 if (company_) { | |
| 96 base::string16 name = StringToLowerASCII(company_->name); | |
| 97 return AddressTypeFromText(name); | |
| 98 } | |
| 99 if (address1_) { | |
| 100 base::string16 name = StringToLowerASCII(address1_->name); | |
| 101 return AddressTypeFromText(name); | |
| 102 } | |
| 103 if (address2_) { | |
| 104 base::string16 name = StringToLowerASCII(address2_->name); | |
| 105 return AddressTypeFromText(name); | |
| 106 } | |
| 107 if (city_) { | |
| 108 base::string16 name = StringToLowerASCII(city_->name); | |
| 109 return AddressTypeFromText(name); | |
| 110 } | |
| 111 if (zip_) { | |
| 112 base::string16 name = StringToLowerASCII(zip_->name); | |
| 113 return AddressTypeFromText(name); | |
| 114 } | |
| 115 if (state_) { | |
| 116 base::string16 name = StringToLowerASCII(state_->name); | |
| 117 return AddressTypeFromText(name); | |
| 118 } | |
| 119 if (country_) { | |
| 120 base::string16 name = StringToLowerASCII(country_->name); | |
| 121 return AddressTypeFromText(name); | |
| 122 } | |
| 123 | |
| 124 return kGenericAddress; | |
| 125 } | |
| 126 | |
| 127 AddressField::AddressField() | |
| 128 : company_(NULL), | |
| 129 address1_(NULL), | |
| 130 address2_(NULL), | |
| 131 city_(NULL), | |
| 132 state_(NULL), | |
| 133 zip_(NULL), | |
| 134 zip4_(NULL), | |
| 135 country_(NULL), | |
| 136 type_(kGenericAddress) { | |
| 137 } | |
| 138 | |
| 139 bool AddressField::ClassifyField(FieldTypeMap* map) const { | |
| 140 AutofillFieldType address_company; | |
| 141 AutofillFieldType address_line1; | |
| 142 AutofillFieldType address_line2; | |
| 143 AutofillFieldType address_city; | |
| 144 AutofillFieldType address_state; | |
| 145 AutofillFieldType address_zip; | |
| 146 AutofillFieldType address_country; | |
| 147 | |
| 148 switch (type_) { | |
| 149 case kShippingAddress: | |
| 150 // Fall through. Autofill does not support shipping addresses. | |
| 151 case kGenericAddress: | |
| 152 address_company = COMPANY_NAME; | |
| 153 address_line1 = ADDRESS_HOME_LINE1; | |
| 154 address_line2 = ADDRESS_HOME_LINE2; | |
| 155 address_city = ADDRESS_HOME_CITY; | |
| 156 address_state = ADDRESS_HOME_STATE; | |
| 157 address_zip = ADDRESS_HOME_ZIP; | |
| 158 address_country = ADDRESS_HOME_COUNTRY; | |
| 159 break; | |
| 160 | |
| 161 case kBillingAddress: | |
| 162 address_company = COMPANY_NAME; | |
| 163 address_line1 = ADDRESS_BILLING_LINE1; | |
| 164 address_line2 = ADDRESS_BILLING_LINE2; | |
| 165 address_city = ADDRESS_BILLING_CITY; | |
| 166 address_state = ADDRESS_BILLING_STATE; | |
| 167 address_zip = ADDRESS_BILLING_ZIP; | |
| 168 address_country = ADDRESS_BILLING_COUNTRY; | |
| 169 break; | |
| 170 | |
| 171 default: | |
| 172 NOTREACHED(); | |
| 173 return false; | |
| 174 } | |
| 175 | |
| 176 bool ok = AddClassification(company_, address_company, map); | |
| 177 ok = ok && AddClassification(address1_, address_line1, map); | |
| 178 ok = ok && AddClassification(address2_, address_line2, map); | |
| 179 ok = ok && AddClassification(city_, address_city, map); | |
| 180 ok = ok && AddClassification(state_, address_state, map); | |
| 181 ok = ok && AddClassification(zip_, address_zip, map); | |
| 182 ok = ok && AddClassification(country_, address_country, map); | |
| 183 return ok; | |
| 184 } | |
| 185 | |
| 186 // static | |
| 187 bool AddressField::ParseCompany(AutofillScanner* scanner, | |
| 188 AddressField* address_field) { | |
| 189 if (address_field->company_ && !address_field->company_->IsEmpty()) | |
| 190 return false; | |
| 191 | |
| 192 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), | |
| 193 &address_field->company_); | |
| 194 } | |
| 195 | |
| 196 // static | |
| 197 bool AddressField::ParseAddressLines(AutofillScanner* scanner, | |
| 198 AddressField* address_field) { | |
| 199 // We only match the string "address" in page text, not in element names, | |
| 200 // because sometimes every element in a group of address fields will have | |
| 201 // a name containing the string "address"; for example, on the page | |
| 202 // Kohl's - Register Billing Address.html the text element labeled "city" | |
| 203 // has the name "BILL_TO_ADDRESS<>city". We do match address labels | |
| 204 // such as "address1", which appear as element names on various pages (eg | |
| 205 // AmericanGirl-Registration.html, BloomingdalesBilling.html, | |
| 206 // EBay Registration Enter Information.html). | |
| 207 if (address_field->address1_) | |
| 208 return false; | |
| 209 | |
| 210 base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); | |
| 211 base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); | |
| 212 | |
| 213 if (!ParseField(scanner, pattern, &address_field->address1_) && | |
| 214 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
| 215 &address_field->address1_)) { | |
| 216 return false; | |
| 217 } | |
| 218 | |
| 219 // Optionally parse more address lines, which may have empty labels. | |
| 220 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) | |
| 221 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! | |
| 222 pattern = UTF8ToUTF16(autofill::kAddressLine2Re); | |
| 223 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); | |
| 224 if (!ParseEmptyLabel(scanner, &address_field->address2_) && | |
| 225 !ParseField(scanner, pattern, &address_field->address2_)) { | |
| 226 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, | |
| 227 &address_field->address2_); | |
| 228 } | |
| 229 | |
| 230 // Try for surplus lines, which we will promptly discard. | |
| 231 if (address_field->address2_ != NULL) { | |
| 232 pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe); | |
| 233 while (ParseField(scanner, pattern, NULL)) { | |
| 234 // Consumed a surplus line, try for another. | |
| 235 } | |
| 236 } | |
| 237 | |
| 238 return true; | |
| 239 } | |
| 240 | |
| 241 // static | |
| 242 bool AddressField::ParseCountry(AutofillScanner* scanner, | |
| 243 AddressField* address_field) { | |
| 244 // Parse a country. The occasional page (e.g. | |
| 245 // Travelocity_New Member Information1.html) calls this a "location". | |
| 246 if (address_field->country_ && !address_field->country_->IsEmpty()) | |
| 247 return false; | |
| 248 | |
| 249 return ParseFieldSpecifics(scanner, | |
| 250 UTF8ToUTF16(autofill::kCountryRe), | |
| 251 MATCH_DEFAULT | MATCH_SELECT, | |
| 252 &address_field->country_); | |
| 253 } | |
| 254 | |
| 255 // static | |
| 256 bool AddressField::ParseZipCode(AutofillScanner* scanner, | |
| 257 AddressField* address_field) { | |
| 258 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this | |
| 259 // is called a "post code". | |
| 260 // | |
| 261 // HACK: Just for the MapQuest driving directions page we match the | |
| 262 // exact name "1z", which MapQuest uses to label its zip code field. | |
| 263 // Hopefully before long we'll be smart enough to find the zip code | |
| 264 // on that page automatically. | |
| 265 if (address_field->zip_) | |
| 266 return false; | |
| 267 | |
| 268 base::string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); | |
| 269 if (!ParseField(scanner, pattern, &address_field->zip_)) | |
| 270 return false; | |
| 271 | |
| 272 address_field->type_ = kGenericAddress; | |
| 273 // Look for a zip+4, whose field name will also often contain | |
| 274 // the substring "zip". | |
| 275 ParseField(scanner, | |
| 276 UTF8ToUTF16(autofill::kZip4Re), | |
| 277 &address_field->zip4_); | |
| 278 | |
| 279 return true; | |
| 280 } | |
| 281 | |
| 282 // static | |
| 283 bool AddressField::ParseCity(AutofillScanner* scanner, | |
| 284 AddressField* address_field) { | |
| 285 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use | |
| 286 // the term "town". | |
| 287 if (address_field->city_) | |
| 288 return false; | |
| 289 | |
| 290 // Select fields are allowed here. This occurs on top-100 site rediff.com. | |
| 291 return ParseFieldSpecifics(scanner, | |
| 292 UTF8ToUTF16(autofill::kCityRe), | |
| 293 MATCH_DEFAULT | MATCH_SELECT, | |
| 294 &address_field->city_); | |
| 295 } | |
| 296 | |
| 297 // static | |
| 298 bool AddressField::ParseState(AutofillScanner* scanner, | |
| 299 AddressField* address_field) { | |
| 300 if (address_field->state_) | |
| 301 return false; | |
| 302 | |
| 303 return ParseFieldSpecifics(scanner, | |
| 304 UTF8ToUTF16(autofill::kStateRe), | |
| 305 MATCH_DEFAULT | MATCH_SELECT, | |
| 306 &address_field->state_); | |
| 307 } | |
| 308 | |
| 309 AddressField::AddressType AddressField::AddressTypeFromText( | |
| 310 const base::string16 &text) { | |
| 311 size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe)); | |
| 312 size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe)); | |
| 313 if (same_as != base::string16::npos || use_shipping != base::string16::npos) | |
| 314 // This text could be a checkbox label such as "same as my billing | |
| 315 // address" or "use my shipping address". | |
| 316 // ++ It would help if we generally skipped all text that appears | |
| 317 // after a check box. | |
| 318 return kGenericAddress; | |
| 319 | |
| 320 // Not all pages say "billing address" and "shipping address" explicitly; | |
| 321 // for example, Craft Catalog1.html has "Bill-to Address" and | |
| 322 // "Ship-to Address". | |
| 323 size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe)); | |
| 324 size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe)); | |
| 325 | |
| 326 if (bill == base::string16::npos && ship == base::string16::npos) | |
| 327 return kGenericAddress; | |
| 328 | |
| 329 if (bill != base::string16::npos && ship == base::string16::npos) | |
| 330 return kBillingAddress; | |
| 331 | |
| 332 if (bill == base::string16::npos && ship != base::string16::npos) | |
| 333 return kShippingAddress; | |
| 334 | |
| 335 if (bill > ship) | |
| 336 return kBillingAddress; | |
| 337 | |
| 338 return kShippingAddress; | |
| 339 } | |
| 340 | |
| 341 } // namespace autofill | |
| OLD | NEW |