OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/dom_distiller/core/page_features.h" |
| 6 |
| 7 #include <string> |
| 8 #include <vector> |
| 9 |
| 10 #include "base/files/file_util.h" |
| 11 #include "base/json/json_reader.h" |
| 12 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/path_service.h" |
| 14 #include "testing/gtest/include/gtest/gtest.h" |
| 15 |
| 16 namespace dom_distiller { |
| 17 |
| 18 // This test uses input data of core features and the output of the training |
| 19 // pipeline's derived feature extraction to ensure that the extraction that is |
| 20 // done in Chromium matches that in the training pipeline. |
| 21 TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) { |
| 22 base::FilePath dir_source_root; |
| 23 EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root)); |
| 24 std::string input_data; |
| 25 ASSERT_TRUE(base::ReadFileToString( |
| 26 dir_source_root.AppendASCII( |
| 27 "components/test/data/dom_distiller/core_features.json"), |
| 28 &input_data)); |
| 29 std::string expected_output_data; |
| 30 // This file contains the output from the calculation of derived features in |
| 31 // the training pipeline. |
| 32 ASSERT_TRUE(base::ReadFileToString( |
| 33 dir_source_root.AppendASCII( |
| 34 "components/test/data/dom_distiller/derived_features.json"), |
| 35 &expected_output_data)); |
| 36 |
| 37 scoped_ptr<base::Value> input_json(base::JSONReader::Read(input_data)); |
| 38 ASSERT_TRUE(input_json); |
| 39 |
| 40 scoped_ptr<base::Value> expected_output_json( |
| 41 base::JSONReader::Read(expected_output_data)); |
| 42 ASSERT_TRUE(expected_output_json); |
| 43 |
| 44 base::ListValue* input_entries; |
| 45 ASSERT_TRUE(input_json->GetAsList(&input_entries)); |
| 46 ASSERT_GT(input_entries->GetSize(), 0u); |
| 47 |
| 48 base::ListValue* expected_output_entries; |
| 49 ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries)); |
| 50 ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize()); |
| 51 |
| 52 // In the output, the features list is a sequence of labels followed by values |
| 53 // (so labels at even indices, values at odd indices). |
| 54 base::DictionaryValue* entry; |
| 55 base::ListValue* derived_features; |
| 56 ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry)); |
| 57 ASSERT_TRUE(entry->GetList("features", &derived_features)); |
| 58 std::vector<std::string> labels; |
| 59 for (size_t i = 0; i < derived_features->GetSize(); i += 2) { |
| 60 std::string label; |
| 61 ASSERT_TRUE(derived_features->GetString(i, &label)); |
| 62 labels.push_back(label); |
| 63 } |
| 64 |
| 65 for (size_t i = 0; i < input_entries->GetSize(); ++i) { |
| 66 base::DictionaryValue* core_features; |
| 67 ASSERT_TRUE(input_entries->GetDictionary(i, &entry)); |
| 68 ASSERT_TRUE(entry->GetDictionary("features", &core_features)); |
| 69 std::vector<double> derived( |
| 70 CalculateDerivedFeaturesFromJSON(core_features)); |
| 71 |
| 72 ASSERT_EQ(labels.size(), derived.size()); |
| 73 ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry)); |
| 74 ASSERT_TRUE(entry->GetList("features", &derived_features)); |
| 75 std::string entry_url; |
| 76 ASSERT_TRUE(entry->GetString("url", &entry_url)); |
| 77 for (size_t j = 0, value_index = 1; j < derived.size(); |
| 78 ++j, value_index += 2) { |
| 79 double expected_value; |
| 80 if (!derived_features->GetDouble(value_index, &expected_value)) { |
| 81 bool bool_value; |
| 82 ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value)); |
| 83 expected_value = bool_value ? 1.0 : 0.0; |
| 84 } |
| 85 EXPECT_DOUBLE_EQ(derived[j], expected_value) |
| 86 << "incorrect value for entry with url " << entry_url |
| 87 << " for derived feature " << labels[j]; |
| 88 } |
| 89 } |
| 90 } |
| 91 } |
OLD | NEW |