Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(129)

Side by Side Diff: chrome/browser/autofill/form_structure.cc

Issue 12434004: Move remaining Autofill code to //components/autofill. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix long lines Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/autofill/form_structure.h"
6
7 #include <utility>
8
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/sha1.h"
14 #include "base/string_util.h"
15 #include "base/stringprintf.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/time.h"
18 #include "base/utf_string_conversions.h"
19 #include "chrome/browser/autofill/autocheckout_page_meta_data.h"
20 #include "chrome/browser/autofill/autofill_metrics.h"
21 #include "chrome/browser/autofill/autofill_type.h"
22 #include "chrome/browser/autofill/autofill_xml_parser.h"
23 #include "chrome/browser/autofill/field_types.h"
24 #include "chrome/browser/autofill/form_field.h"
25 #include "components/autofill/common/form_data.h"
26 #include "components/autofill/common/form_data_predictions.h"
27 #include "components/autofill/common/form_field_data.h"
28 #include "components/autofill/common/form_field_data_predictions.h"
29 #include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
30
31 namespace {
32
33 const char kFormMethodPost[] = "post";
34
35 // XML elements and attributes.
36 const char kAttributeAcceptedFeatures[] = "accepts";
37 const char kAttributeAutofillUsed[] = "autofillused";
38 const char kAttributeAutofillType[] = "autofilltype";
39 const char kAttributeClientVersion[] = "clientversion";
40 const char kAttributeDataPresent[] = "datapresent";
41 const char kAttributeFormSignature[] = "formsignature";
42 const char kAttributeSignature[] = "signature";
43 const char kAttributeUrlprefixSignature[] = "urlprefixsignature";
44 const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments
45 const char kAcceptedFeaturesAutocheckoutExperiment[] = "a,e"; // a=autocheckout
46 const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
47 const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
48 const char kXMLElementAutofillQuery[] = "autofillquery";
49 const char kXMLElementAutofillUpload[] = "autofillupload";
50 const char kXMLElementForm[] = "form";
51 const char kXMLElementField[] = "field";
52
53 // The number of fillable fields necessary for a form to be fillable.
54 const size_t kRequiredFillableFields = 3;
55
56 // Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
57 // |available_field_types| and returns the hex representation as a string.
58 std::string EncodeFieldTypes(const FieldTypeSet& available_field_types) {
59 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
60 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
61 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
62
63 // Pack the types in |available_field_types| into |bit_field|.
64 std::vector<uint8> bit_field(kNumBytes, 0);
65 for (FieldTypeSet::const_iterator field_type = available_field_types.begin();
66 field_type != available_field_types.end();
67 ++field_type) {
68 // Set the appropriate bit in the field. The bit we set is the one
69 // |field_type| % 8 from the left of the byte.
70 const size_t byte = *field_type / 8;
71 const size_t bit = 0x80 >> (*field_type % 8);
72 DCHECK(byte < bit_field.size());
73 bit_field[byte] |= bit;
74 }
75
76 // Discard any trailing zeroes.
77 // If there are no available types, we return the empty string.
78 size_t data_end = bit_field.size();
79 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
80 }
81
82 // Print all meaningfull bytes into a string.
83 std::string data_presence;
84 data_presence.reserve(data_end * 2 + 1);
85 for (size_t i = 0; i < data_end; ++i) {
86 base::StringAppendF(&data_presence, "%02x", bit_field[i]);
87 }
88
89 return data_presence;
90 }
91
92 // Returns |true| iff the |token| is a type hint for a contact field, as
93 // specified in the implementation section of http://is.gd/whatwg_autocomplete
94 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
95 // support filling either type of information.
96 bool IsContactTypeHint(const std::string& token) {
97 return token == "home" || token == "work" || token == "mobile";
98 }
99
100 // Returns |true| iff the |token| is a type hint appropriate for a field of the
101 // given |field_type|, as specified in the implementation section of
102 // http://is.gd/whatwg_autocomplete
103 bool ContactTypeHintMatchesFieldType(const std::string& token,
104 AutofillFieldType field_type) {
105 // The "home" and "work" type hints are only appropriate for email and phone
106 // number field types.
107 if (token == "home" || token == "work") {
108 return field_type == EMAIL_ADDRESS ||
109 (field_type >= PHONE_HOME_NUMBER &&
110 field_type <= PHONE_HOME_WHOLE_NUMBER);
111 }
112
113 // The "mobile" type hint is only appropriate for phone number field types.
114 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
115 // support filling either type of information.
116 if (token == "mobile") {
117 return field_type >= PHONE_HOME_NUMBER &&
118 field_type <= PHONE_HOME_WHOLE_NUMBER;
119 }
120
121 return false;
122 }
123
124 // Returns the Chrome Autofill-supported field type corresponding to the given
125 // |autocomplete_type|, if there is one, in the context of the given |field|.
126 // Chrome Autofill supports a subset of the field types listed at
127 // http://is.gd/whatwg_autocomplete
128 AutofillFieldType FieldTypeFromAutocompleteType(
129 const std::string& autocomplete_type,
130 const AutofillField& field) {
131 if (autocomplete_type == "name")
132 return NAME_FULL;
133
134 if (autocomplete_type == "given-name")
135 return NAME_FIRST;
136
137 if (autocomplete_type == "additional-name") {
138 if (field.max_length == 1)
139 return NAME_MIDDLE_INITIAL;
140 else
141 return NAME_MIDDLE;
142 }
143
144 if (autocomplete_type == "family-name")
145 return NAME_LAST;
146
147 if (autocomplete_type == "honorific-suffix")
148 return NAME_SUFFIX;
149
150 if (autocomplete_type == "organization")
151 return COMPANY_NAME;
152
153 if (autocomplete_type == "street-address" ||
154 autocomplete_type == "address-line1")
155 return ADDRESS_HOME_LINE1;
156
157 if (autocomplete_type == "address-line2")
158 return ADDRESS_HOME_LINE2;
159
160 if (autocomplete_type == "locality")
161 return ADDRESS_HOME_CITY;
162
163 if (autocomplete_type == "region")
164 return ADDRESS_HOME_STATE;
165
166 if (autocomplete_type == "country")
167 return ADDRESS_HOME_COUNTRY;
168
169 if (autocomplete_type == "postal-code")
170 return ADDRESS_HOME_ZIP;
171
172 if (autocomplete_type == "cc-name")
173 return CREDIT_CARD_NAME;
174
175 if (autocomplete_type == "cc-number")
176 return CREDIT_CARD_NUMBER;
177
178 if (autocomplete_type == "cc-exp") {
179 if (field.max_length == 5)
180 return CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
181 else
182 return CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
183 }
184
185 if (autocomplete_type == "cc-exp-month")
186 return CREDIT_CARD_EXP_MONTH;
187
188 if (autocomplete_type == "cc-exp-year") {
189 if (field.max_length == 2)
190 return CREDIT_CARD_EXP_2_DIGIT_YEAR;
191 else
192 return CREDIT_CARD_EXP_4_DIGIT_YEAR;
193 }
194
195 if (autocomplete_type == "cc-csc")
196 return CREDIT_CARD_VERIFICATION_CODE;
197
198 if (autocomplete_type == "cc-type")
199 return CREDIT_CARD_TYPE;
200
201 if (autocomplete_type == "tel")
202 return PHONE_HOME_WHOLE_NUMBER;
203
204 if (autocomplete_type == "tel-country-code")
205 return PHONE_HOME_COUNTRY_CODE;
206
207 if (autocomplete_type == "tel-national")
208 return PHONE_HOME_CITY_AND_NUMBER;
209
210 if (autocomplete_type == "tel-area-code")
211 return PHONE_HOME_CITY_CODE;
212
213 if (autocomplete_type == "tel-local")
214 return PHONE_HOME_NUMBER;
215
216 if (autocomplete_type == "tel-local-prefix")
217 return PHONE_HOME_NUMBER;
218
219 if (autocomplete_type == "tel-local-suffix")
220 return PHONE_HOME_NUMBER;
221
222 if (autocomplete_type == "email")
223 return EMAIL_ADDRESS;
224
225 return UNKNOWN_TYPE;
226 }
227
228 } // namespace
229
230 FormStructure::FormStructure(const FormData& form,
231 const std::string& autocheckout_url_prefix)
232 : form_name_(form.name),
233 source_url_(form.origin),
234 target_url_(form.action),
235 autofill_count_(0),
236 checkable_field_count_(0),
237 upload_required_(USE_UPLOAD_RATES),
238 server_experiment_id_("no server response"),
239 has_author_specified_types_(false),
240 autocheckout_url_prefix_(autocheckout_url_prefix) {
241 // Copy the form fields.
242 std::map<string16, size_t> unique_names;
243 for (std::vector<FormFieldData>::const_iterator field =
244 form.fields.begin();
245 field != form.fields.end(); field++) {
246
247 // Skipping checkable elements when Autocheckout is not enabled, else
248 // these fields will interfere with existing field signatures with Autofill
249 // servers.
250 if (!field->is_checkable || IsAutocheckoutEnabled()) {
251 // Add all supported form fields (including with empty names) to the
252 // signature. This is a requirement for Autofill servers.
253 form_signature_field_names_.append("&");
254 form_signature_field_names_.append(UTF16ToUTF8(field->name));
255 }
256
257 // Generate a unique name for this field by appending a counter to the name.
258 // Make sure to prepend the counter with a non-numeric digit so that we are
259 // guaranteed to avoid collisions.
260 if (!unique_names.count(field->name))
261 unique_names[field->name] = 1;
262 else
263 ++unique_names[field->name];
264 string16 unique_name = field->name + ASCIIToUTF16("_") +
265 base::IntToString16(unique_names[field->name]);
266 fields_.push_back(new AutofillField(*field, unique_name));
267
268 if (field->is_checkable)
269 ++checkable_field_count_;
270 }
271
272 std::string method = UTF16ToUTF8(form.method);
273 if (StringToLowerASCII(method) == kFormMethodPost) {
274 method_ = POST;
275 } else {
276 // Either the method is 'get', or we don't know. In this case we default
277 // to GET.
278 method_ = GET;
279 }
280 }
281
282 FormStructure::~FormStructure() {}
283
284 void FormStructure::DetermineHeuristicTypes(
285 const AutofillMetrics& metric_logger) {
286 // First, try to detect field types based on each field's |autocomplete|
287 // attribute value. If there is at least one form field that specifies an
288 // autocomplete type hint, don't try to apply other heuristics to match fields
289 // in this form.
290 bool has_author_specified_sections;
291 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
292 &has_author_specified_sections);
293
294 if (!has_author_specified_types_) {
295 FieldTypeMap field_type_map;
296 FormField::ParseFormFields(fields_.get(), &field_type_map);
297 for (size_t index = 0; index < field_count(); index++) {
298 AutofillField* field = fields_[index];
299 FieldTypeMap::iterator iter = field_type_map.find(field->unique_name());
300 if (iter != field_type_map.end())
301 field->set_heuristic_type(iter->second);
302 }
303 }
304
305 UpdateAutofillCount();
306 IdentifySections(has_author_specified_sections);
307
308 if (IsAutofillable(true)) {
309 metric_logger.LogDeveloperEngagementMetric(
310 AutofillMetrics::FILLABLE_FORM_PARSED);
311 if (has_author_specified_types_) {
312 metric_logger.LogDeveloperEngagementMetric(
313 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
314 }
315 }
316 }
317
318 bool FormStructure::EncodeUploadRequest(
319 const FieldTypeSet& available_field_types,
320 bool form_was_autofilled,
321 std::string* encoded_xml) const {
322 if (!ShouldBeCrowdsourced()) {
323 NOTREACHED();
324 return false;
325 }
326
327 // Verify that |available_field_types| agrees with the possible field types we
328 // are uploading.
329 for (std::vector<AutofillField*>::const_iterator field = begin();
330 field != end();
331 ++field) {
332 for (FieldTypeSet::const_iterator type = (*field)->possible_types().begin();
333 type != (*field)->possible_types().end();
334 ++type) {
335 DCHECK(*type == UNKNOWN_TYPE ||
336 *type == EMPTY_TYPE ||
337 available_field_types.count(*type));
338 }
339 }
340
341 // Set up the <autofillupload> element and its attributes.
342 buzz::XmlElement autofill_request_xml(
343 (buzz::QName(kXMLElementAutofillUpload)));
344 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
345 kClientVersion);
346 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
347 FormSignature());
348 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
349 form_was_autofilled ? "true" : "false");
350 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
351 EncodeFieldTypes(available_field_types).c_str());
352
353 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
354 return false; // Malformed form, skip it.
355
356 // Obtain the XML structure as a string.
357 encoded_xml->clear();
358 *encoded_xml = kXMLDeclaration;
359 *encoded_xml += autofill_request_xml.Str().c_str();
360
361 // To enable this logging, run with the flag --vmodule="form_structure=2".
362 VLOG(2) << "\n" << *encoded_xml;
363
364 return true;
365 }
366
367 // static
368 bool FormStructure::EncodeQueryRequest(
369 const std::vector<FormStructure*>& forms,
370 std::vector<std::string>* encoded_signatures,
371 std::string* encoded_xml) {
372 DCHECK(encoded_signatures);
373 DCHECK(encoded_xml);
374 encoded_xml->clear();
375 encoded_signatures->clear();
376 encoded_signatures->reserve(forms.size());
377
378 // Set up the <autofillquery> element and attributes.
379 buzz::XmlElement autofill_request_xml(
380 (buzz::QName(kXMLElementAutofillQuery)));
381 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
382 kClientVersion);
383
384 // autocheckout_url_prefix tells the Autofill server where the forms in the
385 // request came from, and the the Autofill server checks internal status and
386 // decide to enable Autocheckout or not and may return Autocheckout related
387 // data in the response accordingly.
388 // There is no page/frame level object associated with FormStructure that
389 // we could extract URL prefix from. But, all the forms should come from the
390 // same frame, so they should have the same Autocheckout URL prefix. Thus we
391 // use URL prefix from the first form with Autocheckout enabled.
392 std::string autocheckout_url_prefix;
393
394 // Some badly formatted web sites repeat forms - detect that and encode only
395 // one form as returned data would be the same for all the repeated forms.
396 std::set<std::string> processed_forms;
397 for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
398 it != forms.end();
399 ++it) {
400 std::string signature((*it)->FormSignature());
401 if (processed_forms.find(signature) != processed_forms.end())
402 continue;
403 processed_forms.insert(signature);
404 scoped_ptr<buzz::XmlElement> encompassing_xml_element(
405 new buzz::XmlElement(buzz::QName(kXMLElementForm)));
406 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
407 signature);
408
409 if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
410 encompassing_xml_element.get()))
411 continue; // Malformed form, skip it.
412
413 if ((*it)->IsAutocheckoutEnabled()) {
414 if (autocheckout_url_prefix.empty()) {
415 autocheckout_url_prefix = (*it)->autocheckout_url_prefix_;
416 } else {
417 // Making sure all the forms in the request has the same url_prefix.
418 DCHECK_EQ(autocheckout_url_prefix, (*it)->autocheckout_url_prefix_);
419 }
420 }
421
422 autofill_request_xml.AddElement(encompassing_xml_element.release());
423 encoded_signatures->push_back(signature);
424 }
425
426 if (!encoded_signatures->size())
427 return false;
428
429 if (autocheckout_url_prefix.empty()) {
430 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
431 kAcceptedFeaturesExperiment);
432 } else {
433 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
434 kAcceptedFeaturesAutocheckoutExperiment);
435 autofill_request_xml.SetAttr(buzz::QName(kAttributeUrlprefixSignature),
436 Hash64Bit(autocheckout_url_prefix));
437 }
438
439 // Obtain the XML structure as a string.
440 *encoded_xml = kXMLDeclaration;
441 *encoded_xml += autofill_request_xml.Str().c_str();
442
443 return true;
444 }
445
446 // static
447 void FormStructure::ParseQueryResponse(
448 const std::string& response_xml,
449 const std::vector<FormStructure*>& forms,
450 autofill::AutocheckoutPageMetaData* page_meta_data,
451 const AutofillMetrics& metric_logger) {
452 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
453
454 // Parse the field types from the server response to the query.
455 std::vector<AutofillServerFieldInfo> field_infos;
456 UploadRequired upload_required;
457 std::string experiment_id;
458 AutofillQueryXmlParser parse_handler(&field_infos, &upload_required,
459 &experiment_id);
460 buzz::XmlParser parser(&parse_handler);
461 parser.Parse(response_xml.c_str(), response_xml.length(), true);
462 if (!parse_handler.succeeded())
463 return;
464
465 page_meta_data->current_page_number = parse_handler.current_page_number();
466 page_meta_data->total_pages = parse_handler.total_pages();
467 if (parse_handler.proceed_element_descriptor()) {
468 page_meta_data->proceed_element_descriptor.reset(
469 new autofill::WebElementDescriptor(
470 *parse_handler.proceed_element_descriptor()));
471 } else {
472 page_meta_data->proceed_element_descriptor.reset();
473 }
474
475 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
476 metric_logger.LogServerExperimentIdForQuery(experiment_id);
477
478 bool heuristics_detected_fillable_field = false;
479 bool query_response_overrode_heuristics = false;
480
481 // Copy the field types into the actual form.
482 std::vector<AutofillServerFieldInfo>::iterator current_info =
483 field_infos.begin();
484 for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
485 iter != forms.end(); ++iter) {
486 FormStructure* form = *iter;
487 form->upload_required_ = upload_required;
488 form->server_experiment_id_ = experiment_id;
489
490 for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
491 field != form->fields_.end(); ++field, ++current_info) {
492 // In some cases *successful* response does not return all the fields.
493 // Quit the update of the types then.
494 if (current_info == field_infos.end())
495 break;
496
497 // UNKNOWN_TYPE is reserved for use by the client.
498 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
499
500 AutofillFieldType heuristic_type = (*field)->type();
501 if (heuristic_type != UNKNOWN_TYPE)
502 heuristics_detected_fillable_field = true;
503
504 (*field)->set_server_type(current_info->field_type);
505 if (heuristic_type != (*field)->type())
506 query_response_overrode_heuristics = true;
507
508 // Copy default value into the field if available.
509 if (!current_info->default_value.empty())
510 (*field)->set_default_value(current_info->default_value);
511 }
512
513 form->UpdateAutofillCount();
514 form->IdentifySections(false);
515 }
516
517 AutofillMetrics::ServerQueryMetric metric;
518 if (query_response_overrode_heuristics) {
519 if (heuristics_detected_fillable_field) {
520 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
521 } else {
522 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
523 }
524 } else {
525 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
526 }
527 metric_logger.LogServerQueryMetric(metric);
528 }
529
530 // static
531 void FormStructure::GetFieldTypePredictions(
532 const std::vector<FormStructure*>& form_structures,
533 std::vector<FormDataPredictions>* forms) {
534 forms->clear();
535 forms->reserve(form_structures.size());
536 for (size_t i = 0; i < form_structures.size(); ++i) {
537 FormStructure* form_structure = form_structures[i];
538 FormDataPredictions form;
539 form.data.name = form_structure->form_name_;
540 form.data.method =
541 ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
542 form.data.origin = form_structure->source_url_;
543 form.data.action = form_structure->target_url_;
544 form.signature = form_structure->FormSignature();
545 form.experiment_id = form_structure->server_experiment_id_;
546
547 for (std::vector<AutofillField*>::const_iterator field =
548 form_structure->fields_.begin();
549 field != form_structure->fields_.end(); ++field) {
550 form.data.fields.push_back(FormFieldData(**field));
551
552 FormFieldDataPredictions annotated_field;
553 annotated_field.signature = (*field)->FieldSignature();
554 annotated_field.heuristic_type =
555 AutofillType::FieldTypeToString((*field)->heuristic_type());
556 annotated_field.server_type =
557 AutofillType::FieldTypeToString((*field)->server_type());
558 annotated_field.overall_type =
559 AutofillType::FieldTypeToString((*field)->type());
560 form.fields.push_back(annotated_field);
561 }
562
563 forms->push_back(form);
564 }
565 }
566
567 std::string FormStructure::FormSignature() const {
568 std::string scheme(target_url_.scheme());
569 std::string host(target_url_.host());
570
571 // If target host or scheme is empty, set scheme and host of source url.
572 // This is done to match the Toolbar's behavior.
573 if (scheme.empty() || host.empty()) {
574 scheme = source_url_.scheme();
575 host = source_url_.host();
576 }
577
578 std::string form_string = scheme + "://" + host + "&" +
579 UTF16ToUTF8(form_name_) +
580 form_signature_field_names_;
581
582 return Hash64Bit(form_string);
583 }
584
585 bool FormStructure::IsAutocheckoutEnabled() const {
586 return !autocheckout_url_prefix_.empty();
587 }
588
589 size_t FormStructure::RequiredFillableFields() const {
590 return IsAutocheckoutEnabled() ? 0 : kRequiredFillableFields;
591 }
592
593 bool FormStructure::IsAutofillable(bool require_method_post) const {
594 if (autofill_count() < RequiredFillableFields())
595 return false;
596
597 return ShouldBeParsed(require_method_post);
598 }
599
600 void FormStructure::UpdateAutofillCount() {
601 autofill_count_ = 0;
602 for (std::vector<AutofillField*>::const_iterator iter = begin();
603 iter != end(); ++iter) {
604 AutofillField* field = *iter;
605 if (field && field->IsFieldFillable())
606 ++autofill_count_;
607 }
608 }
609
610 bool FormStructure::ShouldBeParsed(bool require_method_post) const {
611 // Ignore counting checkable elements towards minimum number of elements
612 // required to parse. This avoids trying to crowdsource forms with few text
613 // or select elements.
614 if ((field_count() - checkable_field_count()) < RequiredFillableFields())
615 return false;
616
617 // Rule out http(s)://*/search?...
618 // e.g. http://www.google.com/search?q=...
619 // http://search.yahoo.com/search?p=...
620 if (target_url_.path() == "/search")
621 return false;
622
623 if (!IsAutocheckoutEnabled()) {
624 // Make sure there is at least one text field when Autocheckout is
625 // not enabled.
626 bool has_text_field = false;
627 for (std::vector<AutofillField*>::const_iterator it = begin();
628 it != end() && !has_text_field; ++it) {
629 has_text_field |= (*it)->form_control_type != "select-one";
630 }
631 if (!has_text_field)
632 return false;
633 }
634
635 return !require_method_post || (method_ == POST);
636 }
637
638 bool FormStructure::ShouldBeCrowdsourced() const {
639 return !has_author_specified_types_ && ShouldBeParsed(true);
640 }
641
642 void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
643 // Map from field signatures to cached fields.
644 std::map<std::string, const AutofillField*> cached_fields;
645 for (size_t i = 0; i < cached_form.field_count(); ++i) {
646 const AutofillField* field = cached_form.field(i);
647 cached_fields[field->FieldSignature()] = field;
648 }
649
650 for (std::vector<AutofillField*>::const_iterator iter = begin();
651 iter != end(); ++iter) {
652 AutofillField* field = *iter;
653
654 std::map<std::string, const AutofillField*>::const_iterator
655 cached_field = cached_fields.find(field->FieldSignature());
656 if (cached_field != cached_fields.end()) {
657 if (field->form_control_type != "select-one" &&
658 field->value == cached_field->second->value) {
659 // From the perspective of learning user data, text fields containing
660 // default values are equivalent to empty fields.
661 field->value = string16();
662 }
663
664 field->set_heuristic_type(cached_field->second->heuristic_type());
665 field->set_server_type(cached_field->second->server_type());
666 }
667 }
668
669 UpdateAutofillCount();
670
671 server_experiment_id_ = cached_form.server_experiment_id();
672
673 // The form signature should match between query and upload requests to the
674 // server. On many websites, form elements are dynamically added, removed, or
675 // rearranged via JavaScript between page load and form submission, so we
676 // copy over the |form_signature_field_names_| corresponding to the query
677 // request.
678 DCHECK_EQ(cached_form.form_name_, form_name_);
679 DCHECK_EQ(cached_form.source_url_, source_url_);
680 DCHECK_EQ(cached_form.target_url_, target_url_);
681 form_signature_field_names_ = cached_form.form_signature_field_names_;
682 }
683
684 void FormStructure::LogQualityMetrics(
685 const AutofillMetrics& metric_logger,
686 const base::TimeTicks& load_time,
687 const base::TimeTicks& interaction_time,
688 const base::TimeTicks& submission_time) const {
689 std::string experiment_id = server_experiment_id();
690 metric_logger.LogServerExperimentIdForUpload(experiment_id);
691
692 size_t num_detected_field_types = 0;
693 bool did_autofill_all_possible_fields = true;
694 bool did_autofill_some_possible_fields = false;
695 for (size_t i = 0; i < field_count(); ++i) {
696 const AutofillField* field = this->field(i);
697 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED,
698 experiment_id);
699
700 // No further logging for empty fields nor for fields where the entered data
701 // does not appear to already exist in the user's stored Autofill data.
702 const FieldTypeSet& field_types = field->possible_types();
703 DCHECK(!field_types.empty());
704 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
705 continue;
706
707 ++num_detected_field_types;
708 if (field->is_autofilled)
709 did_autofill_some_possible_fields = true;
710 else
711 did_autofill_all_possible_fields = false;
712
713 // Collapse field types that Chrome treats as identical, e.g. home and
714 // billing address fields.
715 FieldTypeSet collapsed_field_types;
716 for (FieldTypeSet::const_iterator it = field_types.begin();
717 it != field_types.end();
718 ++it) {
719 // Since we currently only support US phone numbers, the (city code + main
720 // digits) number is almost always identical to the whole phone number.
721 // TODO(isherman): Improve this logic once we add support for
722 // international numbers.
723 if (*it == PHONE_HOME_CITY_AND_NUMBER)
724 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
725 else
726 collapsed_field_types.insert(AutofillType::GetEquivalentFieldType(*it));
727 }
728
729 // Capture the field's type, if it is unambiguous.
730 AutofillFieldType field_type = UNKNOWN_TYPE;
731 if (collapsed_field_types.size() == 1)
732 field_type = *collapsed_field_types.begin();
733
734 AutofillFieldType heuristic_type = field->heuristic_type();
735 AutofillFieldType server_type = field->server_type();
736 AutofillFieldType predicted_type = field->type();
737
738 // Log heuristic, server, and overall type quality metrics, independently of
739 // whether the field was autofilled.
740 if (heuristic_type == UNKNOWN_TYPE) {
741 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
742 field_type, experiment_id);
743 } else if (field_types.count(heuristic_type)) {
744 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
745 field_type, experiment_id);
746 } else {
747 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
748 field_type, experiment_id);
749 }
750
751 if (server_type == NO_SERVER_DATA) {
752 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
753 field_type, experiment_id);
754 } else if (field_types.count(server_type)) {
755 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
756 field_type, experiment_id);
757 } else {
758 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
759 field_type, experiment_id);
760 }
761
762 if (predicted_type == UNKNOWN_TYPE) {
763 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
764 field_type, experiment_id);
765 } else if (field_types.count(predicted_type)) {
766 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
767 field_type, experiment_id);
768 } else {
769 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
770 field_type, experiment_id);
771 }
772
773 // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
774 // have to skip them for the remaining metrics.
775 if (field->form_control_type == "select-one")
776 continue;
777
778 if (field->is_autofilled) {
779 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED,
780 experiment_id);
781 } else {
782 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED,
783 experiment_id);
784
785 if (heuristic_type == UNKNOWN_TYPE) {
786 metric_logger.LogQualityMetric(
787 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
788 experiment_id);
789 } else if (field_types.count(heuristic_type)) {
790 metric_logger.LogQualityMetric(
791 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
792 experiment_id);
793 } else {
794 metric_logger.LogQualityMetric(
795 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
796 experiment_id);
797 }
798
799 if (server_type == NO_SERVER_DATA) {
800 metric_logger.LogQualityMetric(
801 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
802 experiment_id);
803 } else if (field_types.count(server_type)) {
804 metric_logger.LogQualityMetric(
805 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
806 experiment_id);
807 } else {
808 metric_logger.LogQualityMetric(
809 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
810 experiment_id);
811 }
812 }
813 }
814
815 if (num_detected_field_types < RequiredFillableFields()) {
816 metric_logger.LogUserHappinessMetric(
817 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
818 } else {
819 if (did_autofill_all_possible_fields) {
820 metric_logger.LogUserHappinessMetric(
821 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
822 } else if (did_autofill_some_possible_fields) {
823 metric_logger.LogUserHappinessMetric(
824 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
825 } else {
826 metric_logger.LogUserHappinessMetric(
827 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
828 }
829
830 // Unlike the other times, the |submission_time| should always be available.
831 DCHECK(!submission_time.is_null());
832
833 // The |load_time| might be unset, in the case that the form was dynamically
834 // added to the DOM.
835 if (!load_time.is_null()) {
836 // Submission should always chronologically follow form load.
837 DCHECK(submission_time > load_time);
838 base::TimeDelta elapsed = submission_time - load_time;
839 if (did_autofill_some_possible_fields)
840 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
841 else
842 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
843 }
844
845 // The |interaction_time| might be unset, in the case that the user
846 // submitted a blank form.
847 if (!interaction_time.is_null()) {
848 // Submission should always chronologically follow interaction.
849 DCHECK(submission_time > interaction_time);
850 base::TimeDelta elapsed = submission_time - interaction_time;
851 if (did_autofill_some_possible_fields) {
852 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
853 } else {
854 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
855 elapsed);
856 }
857 }
858 }
859 }
860
861 const AutofillField* FormStructure::field(size_t index) const {
862 if (index >= fields_.size()) {
863 NOTREACHED();
864 return NULL;
865 }
866
867 return fields_[index];
868 }
869
870 AutofillField* FormStructure::field(size_t index) {
871 return const_cast<AutofillField*>(
872 static_cast<const FormStructure*>(this)->field(index));
873 }
874
875 size_t FormStructure::field_count() const {
876 return fields_.size();
877 }
878
879 size_t FormStructure::checkable_field_count() const {
880 return checkable_field_count_;
881 }
882
883 std::string FormStructure::server_experiment_id() const {
884 return server_experiment_id_;
885 }
886
887 FormData FormStructure::ToFormData() const {
888 // |data.user_submitted| will always be false.
889 FormData data;
890 data.name = form_name_;
891 data.origin = source_url_;
892 data.action = target_url_;
893 data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET");
894
895 for (size_t i = 0; i < fields_.size(); ++i) {
896 data.fields.push_back(FormFieldData(*fields_[i]));
897 }
898
899 return data;
900 }
901
902 bool FormStructure::operator==(const FormData& form) const {
903 // TODO(jhawkins): Is this enough to differentiate a form?
904 if (form_name_ == form.name &&
905 source_url_ == form.origin &&
906 target_url_ == form.action) {
907 return true;
908 }
909
910 // TODO(jhawkins): Compare field names, IDs and labels once we have labels
911 // set up.
912
913 return false;
914 }
915
916 bool FormStructure::operator!=(const FormData& form) const {
917 return !operator==(form);
918 }
919
920 std::string FormStructure::Hash64Bit(const std::string& str) {
921 std::string hash_bin = base::SHA1HashString(str);
922 DCHECK_EQ(20U, hash_bin.length());
923
924 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
925 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
926 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
927 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
928 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
929 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
930 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
931 ((static_cast<uint64>(hash_bin[7])) & 0xFF);
932
933 return base::Uint64ToString(hash64);
934 }
935
936 bool FormStructure::EncodeFormRequest(
937 FormStructure::EncodeRequestType request_type,
938 buzz::XmlElement* encompassing_xml_element) const {
939 if (!field_count()) // Nothing to add.
940 return false;
941
942 // Some badly formatted web sites repeat fields - limit number of fields to
943 // 48, which is far larger than any valid form and XML still fits into 2K.
944 // Do not send requests for forms with more than this many fields, as they are
945 // near certainly not valid/auto-fillable.
946 const size_t kMaxFieldsOnTheForm = 48;
947 if (field_count() > kMaxFieldsOnTheForm)
948 return false;
949
950 // Add the child nodes for the form fields.
951 for (size_t index = 0; index < field_count(); ++index) {
952 const AutofillField* field = fields_[index];
953 if (request_type == FormStructure::UPLOAD) {
954 // Don't upload checkable fields.
955 if (field->is_checkable)
956 continue;
957
958 FieldTypeSet types = field->possible_types();
959 // |types| could be empty in unit-tests only.
960 for (FieldTypeSet::iterator field_type = types.begin();
961 field_type != types.end(); ++field_type) {
962 buzz::XmlElement *field_element = new buzz::XmlElement(
963 buzz::QName(kXMLElementField));
964
965 field_element->SetAttr(buzz::QName(kAttributeSignature),
966 field->FieldSignature());
967 field_element->SetAttr(buzz::QName(kAttributeAutofillType),
968 base::IntToString(*field_type));
969 encompassing_xml_element->AddElement(field_element);
970 }
971 } else {
972 // Skip putting checkable fields in the request if Autocheckout is not
973 // enabled.
974 if (field->is_checkable && !IsAutocheckoutEnabled())
975 continue;
976
977 buzz::XmlElement *field_element = new buzz::XmlElement(
978 buzz::QName(kXMLElementField));
979 field_element->SetAttr(buzz::QName(kAttributeSignature),
980 field->FieldSignature());
981 encompassing_xml_element->AddElement(field_element);
982 }
983 }
984 return true;
985 }
986
987 void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
988 bool* found_types,
989 bool* found_sections) {
990 const std::string kDefaultSection = "-default";
991
992 *found_types = false;
993 *found_sections = false;
994 for (std::vector<AutofillField*>::iterator it = fields_.begin();
995 it != fields_.end(); ++it) {
996 AutofillField* field = *it;
997
998 // To prevent potential section name collisions, add a default suffix for
999 // other fields. Without this, 'autocomplete' attribute values
1000 // "section--shipping street-address" and "shipping street-address" would be
1001 // parsed identically, given the section handling code below. We do this
1002 // before any validation so that fields with invalid attributes still end up
1003 // in the default section. These default section names will be overridden
1004 // by subsequent heuristic parsing steps if there are no author-specified
1005 // section names.
1006 field->set_section(kDefaultSection);
1007
1008 // Canonicalize the attribute value by trimming whitespace, collapsing
1009 // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1010 std::string autocomplete_attribute =
1011 CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1012 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1013
1014 // The autocomplete attribute is overloaded: it can specify either a field
1015 // type hint or whether autocomplete should be enabled at all. Ignore the
1016 // latter type of attribute value.
1017 if (autocomplete_attribute.empty() ||
1018 autocomplete_attribute == "on" ||
1019 autocomplete_attribute == "off") {
1020 continue;
1021 }
1022
1023 // Any other value, even it is invalid, is considered to be a type hint.
1024 // This allows a website's author to specify an attribute like
1025 // autocomplete="other" on a field to disable all Autofill heuristics for
1026 // the form.
1027 *found_types = true;
1028
1029 // Tokenize the attribute value. Per the spec, the tokens are parsed in
1030 // reverse order.
1031 std::vector<std::string> tokens;
1032 Tokenize(autocomplete_attribute, " ", &tokens);
1033
1034 // The final token must be the field type.
1035 // If it is not one of the known types, abort.
1036 DCHECK(!tokens.empty());
1037 std::string field_type_token = tokens.back();
1038 tokens.pop_back();
1039 AutofillFieldType field_type =
1040 FieldTypeFromAutocompleteType(field_type_token, *field);
1041 if (field_type == UNKNOWN_TYPE)
1042 continue;
1043
1044 // The preceding token, if any, may be a type hint.
1045 if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1046 // If it is, it must match the field type; otherwise, abort.
1047 // Note that an invalid token invalidates the entire attribute value, even
1048 // if the other tokens are valid.
1049 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1050 continue;
1051
1052 // Chrome Autofill ignores these type hints.
1053 tokens.pop_back();
1054 }
1055
1056 // The preceding token, if any, may be a fixed string that is either
1057 // "shipping" or "billing". Chrome Autofill treats these as implicit
1058 // section name suffixes.
1059 DCHECK_EQ(kDefaultSection, field->section());
1060 std::string section = field->section();
1061 if (!tokens.empty() &&
1062 (tokens.back() == "shipping" || tokens.back() == "billing")) {
1063 section = "-" + tokens.back();
1064 tokens.pop_back();
1065 }
1066
1067 // The preceding token, if any, may be a named section.
1068 const std::string kSectionPrefix = "section-";
1069 if (!tokens.empty() &&
1070 StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1071 // Prepend this section name to the suffix set in the preceding block.
1072 section = tokens.back().substr(kSectionPrefix.size()) + section;
1073 tokens.pop_back();
1074 }
1075
1076 // No other tokens are allowed. If there are any remaining, abort.
1077 if (!tokens.empty())
1078 continue;
1079
1080 if (section != kDefaultSection) {
1081 *found_sections = true;
1082 field->set_section(section);
1083 }
1084
1085 // No errors encountered while parsing!
1086 // Update the |field|'s type based on what was parsed from the attribute.
1087 field->set_heuristic_type(field_type);
1088 if (field_type_token == "tel-local-prefix")
1089 field->set_phone_part(AutofillField::PHONE_PREFIX);
1090 else if (field_type_token == "tel-local-suffix")
1091 field->set_phone_part(AutofillField::PHONE_SUFFIX);
1092 }
1093 }
1094
1095 void FormStructure::IdentifySections(bool has_author_specified_sections) {
1096 if (fields_.empty())
1097 return;
1098
1099 if (!has_author_specified_sections) {
1100 // Name sections after the first field in the section.
1101 string16 current_section = fields_.front()->unique_name();
1102
1103 // Keep track of the types we've seen in this section.
1104 std::set<AutofillFieldType> seen_types;
1105 AutofillFieldType previous_type = UNKNOWN_TYPE;
1106
1107 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1108 field != fields_.end(); ++field) {
1109 const AutofillFieldType current_type =
1110 AutofillType::GetEquivalentFieldType((*field)->type());
1111
1112 bool already_saw_current_type = seen_types.count(current_type) > 0;
1113
1114 // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1115 // evening phone number. Our phone number detection is also generally a
1116 // little off. Hence, ignore this field type as a signal here.
1117 if (AutofillType(current_type).group() == AutofillType::PHONE)
1118 already_saw_current_type = false;
1119
1120 // Some forms have adjacent fields of the same type. Two common examples:
1121 // * Forms with two email fields, where the second is meant to "confirm"
1122 // the first.
1123 // * Forms with a <select> menu for states in some countries, and a
1124 // freeform <input> field for states in other countries. (Usually,
1125 // only one of these two will be visible for any given choice of
1126 // country.)
1127 // Generally, adjacent fields of the same type belong in the same logical
1128 // section.
1129 if (current_type == previous_type)
1130 already_saw_current_type = false;
1131
1132 previous_type = current_type;
1133
1134 if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1135 // We reached the end of a section, so start a new section.
1136 seen_types.clear();
1137 current_section = (*field)->unique_name();
1138 }
1139
1140 seen_types.insert(current_type);
1141 (*field)->set_section(UTF16ToUTF8(current_section));
1142 }
1143 }
1144
1145 // Ensure that credit card and address fields are in separate sections.
1146 // This simplifies the section-aware logic in autofill_manager.cc.
1147 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1148 field != fields_.end(); ++field) {
1149 AutofillType::FieldTypeGroup field_type_group =
1150 AutofillType((*field)->type()).group();
1151 if (field_type_group == AutofillType::CREDIT_CARD)
1152 (*field)->set_section((*field)->section() + "-cc");
1153 else
1154 (*field)->set_section((*field)->section() + "-default");
1155 }
1156 }
OLDNEW
« no previous file with comments | « chrome/browser/autofill/form_structure.h ('k') | chrome/browser/autofill/form_structure_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698