Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(355)

Side by Side Diff: components/autofill/core/browser/autofill_regex_constants.cc

Issue 1453193002: autofill: switch autofill_regexes to RE2 library (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: address reviews Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // This file contains UTF8 strings that we want as char arrays. To avoid 5 // This file contains UTF8 strings that we want as char arrays. To avoid
6 // different compilers, we use a script to convert the UTF8 strings into 6 // different compilers, we use a script to convert the UTF8 strings into
7 // numeric literals (\x##). 7 // numeric literals (\x##).
8 8
9 #include "components/autofill/core/browser/autofill_regex_constants.h" 9 #include "components/autofill/core/browser/autofill_regex_constants.h"
10 10
11 // This macro is to workaround the fact that RE2 library only supports ASCII
12 // word boundaries and it is supposed to be the same as \b.
13 #define WORDBREAK "(\\A|\\z|\\PL)"
14
11 namespace autofill { 15 namespace autofill {
12 16
13 ///////////////////////////////////////////////////////////////////////////// 17 /////////////////////////////////////////////////////////////////////////////
14 // address_field.cc 18 // address_field.cc
15 ///////////////////////////////////////////////////////////////////////////// 19 /////////////////////////////////////////////////////////////////////////////
16 const char kAttentionIgnoredRe[] = "attention|attn"; 20 const char kAttentionIgnoredRe[] = "attention|attn";
17 const char kRegionIgnoredRe[] = 21 const char kRegionIgnoredRe[] =
18 "province|region|other" 22 "province|region|other"
19 "|provincia" // es 23 "|provincia" // es
20 "|bairro|suburb"; // pt-BR, pt-PT 24 "|bairro|suburb"; // pt-BR, pt-PT
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 "|país|pais" // es 81 "|país|pais" // es
78 "|国" // ja-JP 82 "|国" // ja-JP
79 "|国家" // zh-CN 83 "|国家" // zh-CN
80 "|국가|나라"; // ko-KR 84 "|국가|나라"; // ko-KR
81 const char kCountryLocationRe[] = 85 const char kCountryLocationRe[] =
82 "location"; 86 "location";
83 const char kZipCodeRe[] = 87 const char kZipCodeRe[] =
84 "zip|postal|post.*code|pcode" 88 "zip|postal|post.*code|pcode"
85 "|pin.?code" // en-IN 89 "|pin.?code" // en-IN
86 "|postleitzahl" // de-DE 90 "|postleitzahl" // de-DE
87 "|\\bcp\\b" // es 91 "|" WORDBREAK "cp" WORDBREAK // es
88 "|\\bcdp\\b" // fr-FR 92 "|" WORDBREAK "cdp" WORDBREAK // fr-FR
89 "|\\bcap\\b" // it-IT 93 "|" WORDBREAK "cap" WORDBREAK // it-IT
90 "|郵便番号" // ja-JP 94 "|郵便番号" // ja-JP
91 "|codigo|codpos|\\bcep\\b" // pt-BR, pt-PT 95 "|codigo|codpos|" WORDBREAK "cep" WORDBREAK // pt-BR, pt-PT
92 "|Почтовый.?Индекс" // ru 96 "|Почтовый.?Индекс" // ru
93 "|邮政编码|邮编" // zh-CN 97 "|邮政编码|邮编" // zh-CN
94 "|郵遞區號" // zh-TW 98 "|郵遞區號" // zh-TW
95 "|우편.?번호"; // ko-KR 99 "|우편.?번호"; // ko-KR
96 const char kZip4Re[] = 100 const char kZip4Re[] =
97 "zip|^-$|post2" 101 "zip|^-$|post2"
98 "|codpos2"; // pt-BR, pt-PT 102 "|codpos2"; // pt-BR, pt-PT
99 const char kCityRe[] = 103 const char kCityRe[] =
100 "city|town" 104 "city|town"
101 "|\\bort\\b|stadt" // de-DE 105 "|" WORDBREAK "ort" WORDBREAK "|stadt" // de-DE
102 "|suburb" // en-AU 106 "|suburb" // en-AU
103 "|ciudad|provincia|localidad|poblacion" // es 107 "|ciudad|provincia|localidad|poblacion" // es
104 "|ville|commune" // fr-FR 108 "|ville|commune" // fr-FR
105 "|localita" // it-IT 109 "|localita" // it-IT
106 "|市区町村" // ja-JP 110 "|市区町村" // ja-JP
107 "|cidade" // pt-BR, pt-PT 111 "|cidade" // pt-BR, pt-PT
108 "|Город" // ru 112 "|Город" // ru
109 "|市" // zh-CN 113 "|市" // zh-CN
110 "|分區" // zh-TW 114 "|分區" // zh-TW
111 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR 115 "|^시[^도·・]|시[·・]?군[·・]?구"; // ko-KR
112 const char kStateRe[] = 116 const char kStateRe[] =
113 "(?<!united )state|county|region|province" 117 "state|county|region|province"
114 "|land" // de-DE 118 "|land" // de-DE
115 "|county|principality" // en-UK 119 "|county|principality" // en-UK
116 "|都道府県" // ja-JP 120 "|都道府県" // ja-JP
117 "|estado|provincia" // pt-BR, pt-PT 121 "|estado|provincia" // pt-BR, pt-PT
118 "|область" // ru 122 "|область" // ru
119 "|省" // zh-CN 123 "|省" // zh-CN
120 "|地區" // zh-TW 124 "|地區" // zh-TW
121 "|^시[·・]?도"; // ko-KR 125 "|^시[·・]?도"; // ko-KR
122 126
123 ///////////////////////////////////////////////////////////////////////////// 127 /////////////////////////////////////////////////////////////////////////////
124 // credit_card_field.cc 128 // credit_card_field.cc
125 ///////////////////////////////////////////////////////////////////////////// 129 /////////////////////////////////////////////////////////////////////////////
126 const char kNameOnCardRe[] = 130 const char kNameOnCardRe[] =
127 "card.?(holder|owner)|name.*\\bon\\b.*card|(card|cc).?name|cc.?full.?name" 131 "card.?(holder|owner)|name.*" WORDBREAK "on" WORDBREAK ".*card"
132 "|(card|cc).?name|cc.?full.?name"
128 "|karteninhaber" // de-DE 133 "|karteninhaber" // de-DE
129 "|nombre.*tarjeta" // es 134 "|nombre.*tarjeta" // es
130 "|nom.*carte" // fr-FR 135 "|nom.*carte" // fr-FR
131 "|nome.*cart" // it-IT 136 "|nome.*cart" // it-IT
132 "|名前" // ja-JP 137 "|名前" // ja-JP
133 "|Имя.*карты" // ru 138 "|Имя.*карты" // ru
134 "|信用卡开户名|开户名|持卡人姓名" // zh-CN 139 "|信用卡开户名|开户名|持卡人姓名" // zh-CN
135 "|持卡人姓名"; // zh-TW 140 "|持卡人姓名"; // zh-TW
136 const char kNameOnCardContextualRe[] = 141 const char kNameOnCardContextualRe[] =
137 "name"; 142 "name";
138 const char kCardNumberRe[] = 143 const char kCardNumberRe[] =
139 "(card|cc|acct).?(number|#|no|num)" 144 "(card|cc|acct).?(number|#|no|num)"
140 "|nummer" // de-DE 145 "|nummer" // de-DE
141 "|credito|numero|número" // es 146 "|credito|numero|número" // es
142 "|numéro" // fr-FR 147 "|numéro" // fr-FR
143 "|カード番号" // ja-JP 148 "|カード番号" // ja-JP
144 "|Номер.*карты" // ru 149 "|Номер.*карты" // ru
145 "|信用卡号|信用卡号码" // zh-CN 150 "|信用卡号|信用卡号码" // zh-CN
146 "|信用卡卡號" // zh-TW 151 "|信用卡卡號" // zh-TW
147 "|카드"; // ko-KR 152 "|카드"; // ko-KR
148 const char kCardCvcRe[] = 153 const char kCardCvcRe[] =
149 "verification|card identification|security code|card code" 154 "verification|card identification|security code|card code"
150 "|cvn|cvv|cvc|csc|cvd|cid|ccv" 155 "|cvn|cvv|cvc|csc|cvd|cid|ccv"
151 "|\\bcid\\b"; 156 "|" WORDBREAK "cid" WORDBREAK;
152 157
153 // "Expiration date" is the most common label here, but some pages have 158 // "Expiration date" is the most common label here, but some pages have
154 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look 159 // "Expires", "exp. date" or "exp. month" and "exp. year". We also look
155 // for the field names ccmonth and ccyear, which appear on at least 4 of 160 // for the field names ccmonth and ccyear, which appear on at least 4 of
156 // our test pages. 161 // our test pages.
157 162
158 // On at least one page (The China Shop2.html) we find only the labels 163 // On at least one page (The China Shop2.html) we find only the labels
159 // "month" and "year". So for now we match these words directly; we'll 164 // "month" and "year". So for now we match these words directly; we'll
160 // see if this turns out to be too general. 165 // see if this turns out to be too general.
161 166
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
240 "|^nome"; // pt-BR, pt-PT 245 "|^nome"; // pt-BR, pt-PT
241 const char kFirstNameRe[] = 246 const char kFirstNameRe[] =
242 "first.*name|initials|fname|first$|given.*name" 247 "first.*name|initials|fname|first$|given.*name"
243 "|vorname" // de-DE 248 "|vorname" // de-DE
244 "|nombre" // es 249 "|nombre" // es
245 "|forename|prénom|prenom" // fr-FR 250 "|forename|prénom|prenom" // fr-FR
246 "|名" // ja-JP 251 "|名" // ja-JP
247 "|nome" // pt-BR, pt-PT 252 "|nome" // pt-BR, pt-PT
248 "|Имя" // ru 253 "|Имя" // ru
249 "|이름"; // ko-KR 254 "|이름"; // ko-KR
250 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b"; 255 const char kMiddleInitialRe[] =
256 "middle.*initial|m\\.i\\.|mi$|" WORDBREAK "mi" WORDBREAK;
251 const char kMiddleNameRe[] = 257 const char kMiddleNameRe[] =
252 "middle.*name|mname|middle$" 258 "middle.*name|mname|middle$"
253 "|apellido.?materno|lastlastname"; // es 259 "|apellido.?materno|lastlastname"; // es
254 const char kLastNameRe[] = 260 const char kLastNameRe[] =
255 "last.*name|lname|surname|last$|secondname|family.*name" 261 "last.*name|lname|surname|last$|secondname|family.*name"
256 "|nachname" // de-DE 262 "|nachname" // de-DE
257 "|apellido" // es 263 "|apellido" // es
258 "|famille|^nom" // fr-FR 264 "|famille|^nom" // fr-FR
259 "|cognome" // it-IT 265 "|cognome" // it-IT
260 "|姓" // ja-JP 266 "|姓" // ja-JP
(...skipping 25 matching lines...) Expand all
286 "^-$|^\\)$"; 292 "^-$|^\\)$";
287 const char kPhoneSuffixSeparatorRe[] = 293 const char kPhoneSuffixSeparatorRe[] =
288 "^-$"; 294 "^-$";
289 const char kPhonePrefixRe[] = 295 const char kPhonePrefixRe[] =
290 "prefix|exchange" 296 "prefix|exchange"
291 "|preselection" // fr-FR 297 "|preselection" // fr-FR
292 "|ddd"; // pt-BR, pt-PT 298 "|ddd"; // pt-BR, pt-PT
293 const char kPhoneSuffixRe[] = 299 const char kPhoneSuffixRe[] =
294 "suffix"; 300 "suffix";
295 const char kPhoneExtensionRe[] = 301 const char kPhoneExtensionRe[] =
296 "\\bext|ext\\b|extension" 302 WORDBREAK "ext|ext" WORDBREAK "|extension"
297 "|ramal"; // pt-BR, pt-PT 303 "|ramal"; // pt-BR, pt-PT
298 304
299 } // namespace autofill 305 } // namespace autofill
306
307 #undef WORDBREAK
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698