OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
| 6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
| 7 |
| 8 #include <vector> |
| 9 |
| 10 #include "base/values.h" |
| 11 #include "url/gurl.h" |
| 12 |
| 13 namespace dom_distiller { |
| 14 |
| 15 // The distillable page detector is a model trained on a list of numeric |
| 16 // features derived from core more complex features of a webpage (like the |
| 17 // body's .textContent). This derives the numeric features for a set of core |
| 18 // features. |
| 19 // |
| 20 // Note: It is crucial that these features are derived in the same way and are |
| 21 // in the same order as in the training pipeline. See //heuristics/distillable |
| 22 // in the external DomDistillerJs repo. |
| 23 std::vector<double> CalculateDerivedFeatures(bool isOGArticle, |
| 24 const GURL& url, |
| 25 double numElements, |
| 26 double numAnchors, |
| 27 double numForms, |
| 28 const std::string& innerText, |
| 29 const std::string& textContent, |
| 30 const std::string& innerHTML); |
| 31 |
| 32 // Calculates the derived features from the JSON value as returned by the |
| 33 // javascript core feature extraction. |
| 34 std::vector<double> CalculateDerivedFeaturesFromJSON(const base::Value* json); |
| 35 |
| 36 } // namespace dom_distiller |
| 37 |
| 38 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_ |
OLD | NEW |