OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <set> | |
9 #include <string> | 10 #include <string> |
10 | 11 |
11 #include "base/callback.h" | 12 #include "base/callback.h" |
12 #include "base/gtest_prod_util.h" | 13 #include "base/gtest_prod_util.h" |
13 #include "base/memory/ref_counted.h" | 14 #include "base/memory/ref_counted.h" |
14 #include "base/values.h" | 15 #include "base/values.h" |
15 #include "components/dom_distiller/core/distiller_page.h" | 16 #include "components/dom_distiller/core/distiller_page.h" |
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 17 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 18 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
18 #include "net/url_request/url_request_context_getter.h" | 19 #include "net/url_request/url_request_context_getter.h" |
19 #include "url/gurl.h" | 20 #include "url/gurl.h" |
20 | 21 |
21 namespace dom_distiller { | 22 namespace dom_distiller { |
22 | 23 |
23 class DistillerImpl; | 24 class DistillerImpl; |
24 | 25 |
25 class Distiller { | 26 class Distiller { |
26 public: | 27 public: |
27 typedef base::Callback<void( | 28 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)> |
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; | 29 DistillerCallback; |
29 virtual ~Distiller() {} | 30 virtual ~Distiller() {} |
30 | 31 |
31 // Distills a page, and asynchrounously returns the article HTML to the | 32 // Distills a page, and asynchrounously returns the article HTML to the |
32 // supplied callback. | 33 // supplied callback. |
33 virtual void DistillPage(const GURL& url, | 34 virtual void DistillPage(const GURL& url, |
34 const DistillerCallback& callback) = 0; | 35 const DistillerCallback& callback) = 0; |
35 }; | 36 }; |
36 | 37 |
37 class DistillerFactory { | 38 class DistillerFactory { |
38 public: | 39 public: |
39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; | 40 virtual scoped_ptr<Distiller> CreateDistiller() = 0; |
40 virtual ~DistillerFactory() {} | 41 virtual ~DistillerFactory() {} |
41 }; | 42 }; |
42 | 43 |
43 // Factory for creating a Distiller. | 44 // Factory for creating a Distiller. |
44 class DistillerFactoryImpl : public DistillerFactory { | 45 class DistillerFactoryImpl : public DistillerFactory { |
45 public: | 46 public: |
46 DistillerFactoryImpl( | 47 DistillerFactoryImpl( |
47 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 48 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); | 49 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); |
49 virtual ~DistillerFactoryImpl(); | 50 virtual ~DistillerFactoryImpl(); |
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; | 51 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; |
51 | 52 |
52 private: | 53 private: |
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; | 54 scoped_ptr<DistillerPageFactory> distiller_page_factory_; |
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; | 55 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; |
55 }; | 56 }; |
56 | 57 |
57 // Distills a article from a page and associated pages. | 58 // Distills a article from a page and associated pages. |
cjhopman
2014/01/29 21:44:13
This class is starting to handle a lot of things:
shashi
2014/01/29 22:51:37
Makes sense, but I will prefer to do it in a separ
| |
58 class DistillerImpl : public Distiller, | 59 class DistillerImpl : public Distiller, |
59 public DistillerPage::Delegate { | 60 public DistillerPage::Delegate { |
60 public: | 61 public: |
61 DistillerImpl( | 62 DistillerImpl( |
62 const DistillerPageFactory& distiller_page_factory, | 63 const DistillerPageFactory& distiller_page_factory, |
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 64 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
64 virtual ~DistillerImpl(); | 65 virtual ~DistillerImpl(); |
65 | 66 |
66 // Creates an execution context. This must be called once before any calls are | 67 // Creates an execution context. This must be called once before any calls are |
67 // made to distill the page. | 68 // made to distill the page. |
68 virtual void Init(); | 69 virtual void Init(); |
69 | 70 |
70 virtual void DistillPage(const GURL& url, | 71 virtual void DistillPage(const GURL& url, |
71 const DistillerCallback& callback) OVERRIDE; | 72 const DistillerCallback& callback) OVERRIDE; |
72 | 73 |
73 // PageDistillerContext::Delegate | 74 // PageDistillerContext::Delegate |
74 virtual void OnLoadURLDone() OVERRIDE; | 75 virtual void OnLoadURLDone() OVERRIDE; |
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; | 76 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; |
76 | 77 |
77 void OnFetchImageDone(const std::string& id, const std::string& response); | 78 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
79 const std::string& id, | |
80 const std::string& response); | |
78 | 81 |
79 private: | 82 private: |
80 virtual void LoadURL(const GURL& url); | 83 virtual void LoadURL(const GURL& url); |
81 virtual void FetchImage(const std::string& image_id, const std::string& item); | 84 virtual void FetchImage(DistilledPageProto* distilled_page_proto, |
85 const std::string& image_id, | |
86 const std::string& item); | |
82 | 87 |
83 // Injects JavaScript to distill a loaded page down to its important content, | 88 // Injects JavaScript to distill a loaded page down to its important content, |
84 // e.g., extracting a news article from its surrounding boilerplate. | 89 // e.g., extracting a news article from its surrounding boilerplate. |
85 void GetDistilledContent(); | 90 void GetDistilledContent(); |
86 | 91 |
92 // Adds url as a new page to the |article_proto_| and triggers distillation | |
93 // for the newly added page. | |
94 void AddAndDistillPage(const GURL& url); | |
95 | |
96 // Distills the page. | |
97 void DistillNextPage(const GURL& url); | |
98 | |
99 // Runs distillation callback when all distillation tasks are finished. | |
100 void DistillationTaskComplete(); | |
101 | |
102 // Returns the last page of |article_proto_|. | |
103 DistilledPageProto* GetLastPage() const; | |
104 | |
87 const DistillerPageFactory& distiller_page_factory_; | 105 const DistillerPageFactory& distiller_page_factory_; |
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 106 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
89 scoped_ptr<DistillerPage> distiller_page_; | 107 scoped_ptr<DistillerPage> distiller_page_; |
90 DistillerCallback distillation_cb_; | 108 DistillerCallback distillation_cb_; |
91 | 109 |
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; | 110 std::map<std::string, DistillerURLFetcher* > image_fetchers_; |
93 | 111 scoped_ptr<DistilledArticleProto> article_proto_; |
94 scoped_ptr<DistilledPageProto> proto_; | 112 bool distillation_in_progress_; |
113 // Set to keep track of which urls are already seen by the distiller. | |
114 std::set<std::string> processed_urls_; | |
95 | 115 |
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 116 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
97 }; | 117 }; |
98 | 118 |
99 } // namespace dom_distiller | 119 } // namespace dom_distiller |
100 | 120 |
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 121 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |