OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <map> | |
9 #include <string> | 8 #include <string> |
9 #include <vector> | |
10 | 10 |
11 #include "base/callback.h" | 11 #include "base/callback.h" |
12 #include "base/gtest_prod_util.h" | 12 #include "base/containers/hash_tables.h" |
13 #include "base/memory/ref_counted.h" | 13 #include "base/memory/scoped_ptr.h" |
14 #include "base/values.h" | |
15 #include "components/dom_distiller/core/distiller_page.h" | |
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 14 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 15 #include "components/dom_distiller/core/page_distiller.h" |
16 #include "components/dom_distiller/core/proto/distilled_article.pb.h" | |
18 #include "net/url_request/url_request_context_getter.h" | 17 #include "net/url_request/url_request_context_getter.h" |
19 #include "url/gurl.h" | 18 #include "url/gurl.h" |
20 | 19 |
21 namespace dom_distiller { | 20 namespace dom_distiller { |
22 | 21 |
23 class DistillerImpl; | 22 class DistillerImpl; |
24 | 23 |
25 class Distiller { | 24 class Distiller { |
26 public: | 25 public: |
27 typedef base::Callback<void( | 26 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)> |
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; | 27 DistillerCallback; |
29 virtual ~Distiller() {} | 28 virtual ~Distiller() {} |
30 | 29 |
31 // Distills a page, and asynchrounously returns the article HTML to the | 30 // Distills a page, and asynchrounously returns the article HTML to the |
32 // supplied callback. | 31 // supplied callback. |
33 virtual void DistillPage(const GURL& url, | 32 virtual void DistillPage(const GURL& url, |
34 const DistillerCallback& callback) = 0; | 33 const DistillerCallback& callback) = 0; |
35 }; | 34 }; |
36 | 35 |
37 class DistillerFactory { | 36 class DistillerFactory { |
38 public: | 37 public: |
39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; | 38 virtual scoped_ptr<Distiller> CreateDistiller() = 0; |
40 virtual ~DistillerFactory() {} | 39 virtual ~DistillerFactory() {} |
41 }; | 40 }; |
42 | 41 |
43 // Factory for creating a Distiller. | 42 // Factory for creating a Distiller. |
44 class DistillerFactoryImpl : public DistillerFactory { | 43 class DistillerFactoryImpl : public DistillerFactory { |
45 public: | 44 public: |
46 DistillerFactoryImpl( | 45 DistillerFactoryImpl( |
47 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 46 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); | 47 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); |
49 virtual ~DistillerFactoryImpl(); | 48 virtual ~DistillerFactoryImpl(); |
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; | 49 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; |
51 | 50 |
52 private: | 51 private: |
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; | 52 scoped_ptr<DistillerPageFactory> distiller_page_factory_; |
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; | 53 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; |
55 }; | 54 }; |
56 | 55 |
57 // Distills a article from a page and associated pages. | 56 // Distills a article from a page and associated pages. |
58 class DistillerImpl : public Distiller, | 57 class DistillerImpl : public Distiller { |
59 public DistillerPage::Delegate { | |
60 public: | 58 public: |
61 DistillerImpl( | 59 DistillerImpl( |
62 const DistillerPageFactory& distiller_page_factory, | 60 const DistillerPageFactory& distiller_page_factory, |
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
64 virtual ~DistillerImpl(); | 62 virtual ~DistillerImpl(); |
65 | 63 |
66 // Creates an execution context. This must be called once before any calls are | 64 // Creates an execution context. This must be called once before any calls are |
67 // made to distill the page. | 65 // made to distill the page. |
68 virtual void Init(); | 66 virtual void Init(); |
69 | 67 |
70 virtual void DistillPage(const GURL& url, | 68 virtual void DistillPage(const GURL& url, |
71 const DistillerCallback& callback) OVERRIDE; | 69 const DistillerCallback& callback) OVERRIDE; |
72 | 70 |
73 // PageDistillerContext::Delegate | 71 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
cjhopman
2014/02/03 23:56:53
nit: should be private
shashi
2014/02/04 01:39:37
Done.
| |
74 virtual void OnLoadURLDone() OVERRIDE; | 72 const std::string& id, |
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; | 73 const std::string& response); |
76 | 74 |
77 void OnFetchImageDone(const std::string& id, const std::string& response); | 75 void OnPageDistillationFinished(const GURL& page_url, |
cjhopman
2014/02/03 23:56:53
nit: should be private
shashi
2014/02/04 01:39:37
Done.
| |
76 const DistilledPageInfo& distilled_page, | |
77 bool distillation_successful); | |
78 | 78 |
79 private: | 79 private: |
80 virtual void LoadURL(const GURL& url); | 80 virtual void FetchImage(DistilledPageProto* distilled_page_proto, |
81 virtual void FetchImage(const std::string& image_id, const std::string& item); | 81 const std::string& image_id, |
82 const std::string& item); | |
82 | 83 |
83 // Injects JavaScript to distill a loaded page down to its important content, | 84 // Distills the page and adds the new page to |article_proto|. |
84 // e.g., extracting a news article from its surrounding boilerplate. | 85 void DistillPage(const GURL& url); |
85 void GetDistilledContent(); | |
86 | 86 |
87 const DistillerPageFactory& distiller_page_factory_; | 87 // Checks if all distillation callbacks are finished and runs the |
cjhopman
2014/02/03 23:56:53
nit: This reads a little awkward, how about "Runs
shashi
2014/02/04 01:39:37
Done.
| |
88 // |distillation_cb_| if callbacks are finished. | |
89 void RunDistillerCallbackIfDone(); | |
90 | |
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 91 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
89 scoped_ptr<DistillerPage> distiller_page_; | 92 scoped_ptr<PageDistiller> page_distiller_; |
90 DistillerCallback distillation_cb_; | 93 DistillerCallback distillation_cb_; |
91 | 94 |
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; | 95 base::hash_map<std::string, DistillerURLFetcher*> image_fetchers_; |
93 | 96 scoped_ptr<DistilledArticleProto> article_proto_; |
94 scoped_ptr<DistilledPageProto> proto_; | 97 bool distillation_in_progress_; |
98 // Set to keep track of which urls are already seen by the distiller. | |
99 std::vector<std::string> processed_urls_; | |
95 | 100 |
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 101 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
97 }; | 102 }; |
98 | 103 |
99 } // namespace dom_distiller | 104 } // namespace dom_distiller |
100 | 105 |
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 106 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |