Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Unified Diff: components/dom_distiller/core/page_features_unittest.cc

Issue 1042053003: Add calculation of derived features for distillable page model (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/dom_distiller/core/page_features.cc ('k') | components/test/data/dom_distiller/OWNERS » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/dom_distiller/core/page_features_unittest.cc
diff --git a/components/dom_distiller/core/page_features_unittest.cc b/components/dom_distiller/core/page_features_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a863afc5631b9edef038782866ee501567cfba54
--- /dev/null
+++ b/components/dom_distiller/core/page_features_unittest.cc
@@ -0,0 +1,91 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/dom_distiller/core/page_features.h"
+
+#include <string>
+#include <vector>
+
+#include "base/files/file_util.h"
+#include "base/json/json_reader.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/path_service.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace dom_distiller {
+
+// This test uses input data of core features and the output of the training
+// pipeline's derived feature extraction to ensure that the extraction that is
+// done in Chromium matches that in the training pipeline.
+TEST(DomDistillerPageFeaturesTest, TestCalculateDerivedFeatures) {
+ base::FilePath dir_source_root;
+ EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &dir_source_root));
+ std::string input_data;
+ ASSERT_TRUE(base::ReadFileToString(
+ dir_source_root.AppendASCII(
+ "components/test/data/dom_distiller/core_features.json"),
+ &input_data));
+ std::string expected_output_data;
+ // This file contains the output from the calculation of derived features in
+ // the training pipeline.
+ ASSERT_TRUE(base::ReadFileToString(
+ dir_source_root.AppendASCII(
+ "components/test/data/dom_distiller/derived_features.json"),
+ &expected_output_data));
+
+ scoped_ptr<base::Value> input_json(base::JSONReader::Read(input_data));
+ ASSERT_TRUE(input_json);
+
+ scoped_ptr<base::Value> expected_output_json(
+ base::JSONReader::Read(expected_output_data));
+ ASSERT_TRUE(expected_output_json);
+
+ base::ListValue* input_entries;
+ ASSERT_TRUE(input_json->GetAsList(&input_entries));
+ ASSERT_GT(input_entries->GetSize(), 0u);
+
+ base::ListValue* expected_output_entries;
+ ASSERT_TRUE(expected_output_json->GetAsList(&expected_output_entries));
+ ASSERT_EQ(expected_output_entries->GetSize(), input_entries->GetSize());
+
+ // In the output, the features list is a sequence of labels followed by values
+ // (so labels at even indices, values at odd indices).
+ base::DictionaryValue* entry;
+ base::ListValue* derived_features;
+ ASSERT_TRUE(expected_output_entries->GetDictionary(0, &entry));
+ ASSERT_TRUE(entry->GetList("features", &derived_features));
+ std::vector<std::string> labels;
+ for (size_t i = 0; i < derived_features->GetSize(); i += 2) {
+ std::string label;
+ ASSERT_TRUE(derived_features->GetString(i, &label));
+ labels.push_back(label);
+ }
+
+ for (size_t i = 0; i < input_entries->GetSize(); ++i) {
+ base::DictionaryValue* core_features;
+ ASSERT_TRUE(input_entries->GetDictionary(i, &entry));
+ ASSERT_TRUE(entry->GetDictionary("features", &core_features));
+ std::vector<double> derived(
+ CalculateDerivedFeaturesFromJSON(core_features));
+
+ ASSERT_EQ(labels.size(), derived.size());
+ ASSERT_TRUE(expected_output_entries->GetDictionary(i, &entry));
+ ASSERT_TRUE(entry->GetList("features", &derived_features));
+ std::string entry_url;
+ ASSERT_TRUE(entry->GetString("url", &entry_url));
+ for (size_t j = 0, value_index = 1; j < derived.size();
+ ++j, value_index += 2) {
+ double expected_value;
+ if (!derived_features->GetDouble(value_index, &expected_value)) {
+ bool bool_value;
+ ASSERT_TRUE(derived_features->GetBoolean(value_index, &bool_value));
+ expected_value = bool_value ? 1.0 : 0.0;
+ }
+ EXPECT_DOUBLE_EQ(derived[j], expected_value)
+ << "incorrect value for entry with url " << entry_url
+ << " for derived feature " << labels[j];
+ }
+ }
+}
+}
« no previous file with comments | « components/dom_distiller/core/page_features.cc ('k') | components/test/data/dom_distiller/OWNERS » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698