Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Unified Diff: components/autofill/core/browser/address_field.cc

Issue 1453193002: autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: address reviews Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/autofill.gypi ('k') | components/autofill/core/browser/autofill_regex_constants.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/autofill/core/browser/address_field.cc
diff --git a/components/autofill/core/browser/address_field.cc b/components/autofill/core/browser/address_field.cc
index bf96dc2e75860ca64d95a8be1e20ca9be5360e7a..b9948dbcdda7a76b668afd1348e20114cccf44cb 100644
--- a/components/autofill/core/browser/address_field.cc
+++ b/components/autofill/core/browser/address_field.cc
@@ -8,16 +8,12 @@
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
-#include "base/strings/string16.h"
#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
#include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/autofill_regex_constants.h"
#include "components/autofill/core/browser/autofill_scanner.h"
#include "components/autofill/core/browser/field_types.h"
-using base::UTF8ToUTF16;
-
namespace autofill {
namespace {
@@ -48,9 +44,6 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) {
const AutofillField* const initial_field = scanner->Cursor();
size_t saved_cursor = scanner->SaveCursor();
- base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
- base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
-
// Allow address fields to appear in any order.
size_t begin_trailing_non_labeled_fields = 0;
bool has_trailing_non_labeled_fields = false;
@@ -62,8 +55,8 @@ scoped_ptr<FormField> AddressField::Parse(AutofillScanner* scanner) {
address_field->ParseCompany(scanner)) {
has_trailing_non_labeled_fields = false;
continue;
- } else if (ParseField(scanner, attention_ignored, NULL) ||
- ParseField(scanner, region_ignored, NULL)) {
+ } else if (ParseField(scanner, kAttentionIgnoredRe, NULL) ||
+ ParseField(scanner, kRegionIgnoredRe, NULL)) {
// We ignore the following:
// * Attention.
// * Province/Region/Other.
@@ -148,7 +141,7 @@ bool AddressField::ParseCompany(AutofillScanner* scanner) {
if (company_ && !company_->IsEmpty())
return false;
- return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_);
+ return ParseField(scanner, kCompanyRe, &company_);
}
bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
@@ -164,19 +157,17 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
return false;
// Ignore "Address Lookup" field. http://crbug.com/427622
- if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), NULL))
+ if (ParseField(scanner, kAddressLookupRe, NULL))
return false;
- base::string16 pattern = UTF8ToUTF16(kAddressLine1Re);
- base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
- if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) &&
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
+ if (!ParseFieldSpecifics(scanner, kAddressLine1Re, MATCH_DEFAULT,
&address1_) &&
- !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
- &street_address_) &&
- !ParseFieldSpecifics(scanner, label_pattern,
- MATCH_LABEL | MATCH_TEXT_AREA,
- &street_address_))
+ !ParseFieldSpecifics(scanner, kAddressLine1LabelRe,
+ MATCH_LABEL | MATCH_TEXT, &address1_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine1Re,
+ MATCH_DEFAULT | MATCH_TEXT_AREA, &street_address_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine1LabelRe,
+ MATCH_LABEL | MATCH_TEXT_AREA, &street_address_))
return false;
if (street_address_)
@@ -185,19 +176,16 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
// This code may not pick up pages that have an address field consisting of a
// sequence of unlabeled address fields. If we need to add this, see
// discussion on https://codereview.chromium.org/741493003/
- pattern = UTF8ToUTF16(kAddressLine2Re);
- label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
- if (!ParseField(scanner, pattern, &address2_) &&
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
- &address2_))
+ if (!ParseField(scanner, kAddressLine2Re, &address2_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine2LabelRe,
+ MATCH_LABEL | MATCH_TEXT, &address2_))
return true;
// Optionally parse address line 3. This uses the same label regexp as
// address 2 above.
- pattern = UTF8ToUTF16(kAddressLinesExtraRe);
- if (!ParseField(scanner, pattern, &address3_) &&
- !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
- &address3_))
+ if (!ParseField(scanner, kAddressLinesExtraRe, &address3_) &&
+ !ParseFieldSpecifics(scanner, kAddressLine2LabelRe,
+ MATCH_LABEL | MATCH_TEXT, &address3_))
return true;
// Try for surplus lines, which we will promptly discard. Some pages have 4
@@ -205,8 +193,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
//
// Since these are rare, don't bother considering unlabeled lines as extra
// address lines.
- pattern = UTF8ToUTF16(kAddressLinesExtraRe);
- while (ParseField(scanner, pattern, NULL)) {
+ while (ParseField(scanner, kAddressLinesExtraRe, NULL)) {
// Consumed a surplus line, try for another.
}
return true;
@@ -217,9 +204,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) {
return false;
scanner->SaveCursor();
- if (ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCountryRe),
- MATCH_DEFAULT | MATCH_SELECT,
+ if (ParseFieldSpecifics(scanner, kCountryRe, MATCH_DEFAULT | MATCH_SELECT,
&country_)) {
return true;
}
@@ -227,8 +212,7 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) {
// The occasional page (e.g. google account registration page) calls this a
// "location". However, this only makes sense for select tags.
scanner->Rewind();
- return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCountryLocationRe),
+ return ParseFieldSpecifics(scanner, kCountryLocationRe,
MATCH_LABEL | MATCH_NAME | MATCH_SELECT,
&country_);
}
@@ -237,16 +221,13 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner) {
if (zip_)
return false;
- if (!ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kZipCodeRe),
- kZipCodeMatchType,
- &zip_)) {
+ if (!ParseFieldSpecifics(scanner, kZipCodeRe, kZipCodeMatchType, &zip_)) {
return false;
}
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
- ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_);
+ ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_);
return true;
}
@@ -254,20 +235,21 @@ bool AddressField::ParseCity(AutofillScanner* scanner) {
if (city_)
return false;
- return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kCityRe),
- kCityMatchType,
- &city_);
+ return ParseFieldSpecifics(scanner, kCityRe, kCityMatchType, &city_);
}
bool AddressField::ParseState(AutofillScanner* scanner) {
if (state_)
return false;
- return ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kStateRe),
- kStateMatchType,
- &state_);
+ // Ignore spurious matches for "United States".
Evan Stade 2015/12/08 19:34:56 where is this coming from? is this a new addition?
Ilya Sherman 2015/12/08 20:02:28 It's coming from a change to one of the regexes, w
+ size_t saved_cursor = scanner->SaveCursor();
+ if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) {
+ scanner->RewindTo(saved_cursor);
+ return false;
+ }
+
+ return ParseFieldSpecifics(scanner, kStateRe, kStateMatchType, &state_);
}
bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) {
@@ -329,7 +311,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
return RESULT_MATCH_NONE;
ParseNameLabelResult result = ParseNameAndLabelSeparately(
- scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_);
+ scanner, kZipCodeRe, kZipCodeMatchType, &zip_);
if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd())
return result;
@@ -349,10 +331,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
if (!found_non_zip4) {
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
- ParseFieldSpecifics(scanner,
- UTF8ToUTF16(kZip4Re),
- kZipCodeMatchType,
- &zip4_);
+ ParseFieldSpecifics(scanner, kZip4Re, kZipCodeMatchType, &zip4_);
}
return result;
}
@@ -362,8 +341,7 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity(
if (city_)
return RESULT_MATCH_NONE;
- return ParseNameAndLabelSeparately(
- scanner, UTF8ToUTF16(kCityRe), kCityMatchType, &city_);
+ return ParseNameAndLabelSeparately(scanner, kCityRe, kCityMatchType, &city_);
}
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState(
@@ -371,8 +349,14 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState(
if (state_)
return RESULT_MATCH_NONE;
- return ParseNameAndLabelSeparately(
- scanner, UTF8ToUTF16(kStateRe), kStateMatchType, &state_);
+ size_t saved_cursor = scanner->SaveCursor();
+ if (ParseFieldSpecifics(scanner, "United States", kStateMatchType, nullptr)) {
+ scanner->RewindTo(saved_cursor);
+ return RESULT_MATCH_NONE;
+ }
+
+ return ParseNameAndLabelSeparately(scanner, kStateRe, kStateMatchType,
+ &state_);
}
} // namespace autofill
« no previous file with comments | « components/autofill.gypi ('k') | components/autofill/core/browser/autofill_regex_constants.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698