OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <map> | |
9 #include <string> | 8 #include <string> |
10 | 9 |
11 #include "base/callback.h" | 10 #include "base/callback.h" |
12 #include "base/gtest_prod_util.h" | 11 #include "base/containers/hash_tables.h" |
13 #include "base/memory/ref_counted.h" | 12 #include "base/memory/scoped_ptr.h" |
14 #include "base/values.h" | |
15 #include "components/dom_distiller/core/distiller_page.h" | |
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 13 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 14 #include "components/dom_distiller/core/page_distiller.h" |
| 15 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
18 #include "net/url_request/url_request_context_getter.h" | 16 #include "net/url_request/url_request_context_getter.h" |
19 #include "url/gurl.h" | 17 #include "url/gurl.h" |
20 | 18 |
21 namespace dom_distiller { | 19 namespace dom_distiller { |
22 | 20 |
23 class DistillerImpl; | 21 class DistillerImpl; |
24 | 22 |
25 class Distiller { | 23 class Distiller { |
26 public: | 24 public: |
27 typedef base::Callback<void( | 25 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)> |
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; | 26 DistillerCallback; |
29 virtual ~Distiller() {} | 27 virtual ~Distiller() {} |
30 | 28 |
31 // Distills a page, and asynchrounously returns the article HTML to the | 29 // Distills a page, and asynchrounously returns the article HTML to the |
32 // supplied callback. | 30 // supplied callback. |
33 virtual void DistillPage(const GURL& url, | 31 virtual void DistillPage(const GURL& url, |
34 const DistillerCallback& callback) = 0; | 32 const DistillerCallback& callback) = 0; |
35 }; | 33 }; |
36 | 34 |
37 class DistillerFactory { | 35 class DistillerFactory { |
38 public: | 36 public: |
39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; | 37 virtual scoped_ptr<Distiller> CreateDistiller() = 0; |
40 virtual ~DistillerFactory() {} | 38 virtual ~DistillerFactory() {} |
41 }; | 39 }; |
42 | 40 |
43 // Factory for creating a Distiller. | 41 // Factory for creating a Distiller. |
44 class DistillerFactoryImpl : public DistillerFactory { | 42 class DistillerFactoryImpl : public DistillerFactory { |
45 public: | 43 public: |
46 DistillerFactoryImpl( | 44 DistillerFactoryImpl( |
47 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 45 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); | 46 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); |
49 virtual ~DistillerFactoryImpl(); | 47 virtual ~DistillerFactoryImpl(); |
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; | 48 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; |
51 | 49 |
52 private: | 50 private: |
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; | 51 scoped_ptr<DistillerPageFactory> distiller_page_factory_; |
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; | 52 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; |
55 }; | 53 }; |
56 | 54 |
57 // Distills a article from a page and associated pages. | 55 // Distills a article from a page and associated pages. |
58 class DistillerImpl : public Distiller, | 56 class DistillerImpl : public Distiller { |
59 public DistillerPage::Delegate { | |
60 public: | 57 public: |
61 DistillerImpl( | 58 DistillerImpl( |
62 const DistillerPageFactory& distiller_page_factory, | 59 const DistillerPageFactory& distiller_page_factory, |
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 60 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
64 virtual ~DistillerImpl(); | 61 virtual ~DistillerImpl(); |
65 | 62 |
66 // Creates an execution context. This must be called once before any calls are | 63 // Creates an execution context. This must be called once before any calls are |
67 // made to distill the page. | 64 // made to distill the page. |
68 virtual void Init(); | 65 virtual void Init(); |
69 | 66 |
70 virtual void DistillPage(const GURL& url, | 67 virtual void DistillPage(const GURL& url, |
71 const DistillerCallback& callback) OVERRIDE; | 68 const DistillerCallback& callback) OVERRIDE; |
72 | 69 |
73 // PageDistillerContext::Delegate | 70 private: |
74 virtual void OnLoadURLDone() OVERRIDE; | 71 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; | 72 const std::string& id, |
| 73 const std::string& response); |
76 | 74 |
77 void OnFetchImageDone(const std::string& id, const std::string& response); | 75 void OnPageDistillationFinished(const GURL& page_url, |
| 76 scoped_ptr<DistilledPageInfo> distilled_page, |
| 77 bool distillation_successful); |
78 | 78 |
79 private: | 79 virtual void FetchImage(DistilledPageProto* distilled_page_proto, |
80 virtual void LoadURL(const GURL& url); | 80 const std::string& image_id, |
81 virtual void FetchImage(const std::string& image_id, const std::string& item); | 81 const std::string& item); |
82 | 82 |
83 // Injects JavaScript to distill a loaded page down to its important content, | 83 // Distills the page and adds the new page to |article_proto|. |
84 // e.g., extracting a news article from its surrounding boilerplate. | 84 void DistillPage(const GURL& url); |
85 void GetDistilledContent(); | |
86 | 85 |
87 const DistillerPageFactory& distiller_page_factory_; | 86 // Runs |distillation_cb_| if all distillation callbacks and image fetches are |
| 87 // complete. |
| 88 void RunDistillerCallbackIfDone(); |
| 89 |
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 90 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
89 scoped_ptr<DistillerPage> distiller_page_; | 91 scoped_ptr<PageDistiller> page_distiller_; |
90 DistillerCallback distillation_cb_; | 92 DistillerCallback distillation_cb_; |
91 | 93 |
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; | 94 base::hash_map<std::string, DistillerURLFetcher*> image_fetchers_; |
93 | 95 scoped_ptr<DistilledArticleProto> article_proto_; |
94 scoped_ptr<DistilledPageProto> proto_; | 96 bool distillation_in_progress_; |
| 97 // Set to keep track of which urls are already seen by the distiller. |
| 98 base::hash_set<std::string> processed_urls_; |
95 | 99 |
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 100 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
97 }; | 101 }; |
98 | 102 |
99 } // namespace dom_distiller | 103 } // namespace dom_distiller |
100 | 104 |
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 105 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |