OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <set> | |
9 #include <string> | 10 #include <string> |
10 | 11 |
11 #include "base/callback.h" | 12 #include "base/callback.h" |
12 #include "base/gtest_prod_util.h" | 13 #include "base/memory/scoped_ptr.h" |
13 #include "base/memory/ref_counted.h" | |
14 #include "base/values.h" | |
15 #include "components/dom_distiller/core/distiller_page.h" | |
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 14 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 15 #include "components/dom_distiller/core/page_distiller.h" |
16 #include "components/dom_distiller/core/proto/distilled_article.pb.h" | |
18 #include "net/url_request/url_request_context_getter.h" | 17 #include "net/url_request/url_request_context_getter.h" |
19 #include "url/gurl.h" | 18 #include "url/gurl.h" |
20 | 19 |
21 namespace dom_distiller { | 20 namespace dom_distiller { |
22 | 21 |
23 class DistillerImpl; | 22 class DistillerImpl; |
24 | 23 |
25 class Distiller { | 24 class Distiller { |
26 public: | 25 public: |
27 typedef base::Callback<void( | 26 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)> |
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; | 27 DistillerCallback; |
29 virtual ~Distiller() {} | 28 virtual ~Distiller() {} |
30 | 29 |
31 // Distills a page, and asynchrounously returns the article HTML to the | 30 // Distills a page, and asynchrounously returns the article HTML to the |
32 // supplied callback. | 31 // supplied callback. |
33 virtual void DistillPage(const GURL& url, | 32 virtual void DistillPage(const GURL& url, |
34 const DistillerCallback& callback) = 0; | 33 const DistillerCallback& callback) = 0; |
35 }; | 34 }; |
36 | 35 |
37 class DistillerFactory { | 36 class DistillerFactory { |
38 public: | 37 public: |
(...skipping 10 matching lines...) Expand all Loading... | |
49 virtual ~DistillerFactoryImpl(); | 48 virtual ~DistillerFactoryImpl(); |
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; | 49 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; |
51 | 50 |
52 private: | 51 private: |
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; | 52 scoped_ptr<DistillerPageFactory> distiller_page_factory_; |
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; | 53 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; |
55 }; | 54 }; |
56 | 55 |
57 // Distills a article from a page and associated pages. | 56 // Distills a article from a page and associated pages. |
58 class DistillerImpl : public Distiller, | 57 class DistillerImpl : public Distiller, |
59 public DistillerPage::Delegate { | 58 public PageDistiller::PageDistillerCallback { |
cjhopman
2014/02/03 21:47:22
This should not subclass a Callback.
shashi
2014/02/03 23:19:29
Done.
| |
60 public: | 59 public: |
61 DistillerImpl( | 60 DistillerImpl( |
62 const DistillerPageFactory& distiller_page_factory, | 61 const DistillerPageFactory& distiller_page_factory, |
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); | 62 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); |
64 virtual ~DistillerImpl(); | 63 virtual ~DistillerImpl(); |
65 | 64 |
66 // Creates an execution context. This must be called once before any calls are | 65 // Creates an execution context. This must be called once before any calls are |
67 // made to distill the page. | 66 // made to distill the page. |
68 virtual void Init(); | 67 virtual void Init(); |
69 | 68 |
70 virtual void DistillPage(const GURL& url, | 69 virtual void DistillPage(const GURL& url, |
71 const DistillerCallback& callback) OVERRIDE; | 70 const DistillerCallback& callback) OVERRIDE; |
72 | 71 |
73 // PageDistillerContext::Delegate | 72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
74 virtual void OnLoadURLDone() OVERRIDE; | 73 const std::string& id, |
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; | 74 const std::string& response); |
76 | 75 |
77 void OnFetchImageDone(const std::string& id, const std::string& response); | 76 // PageDistiller::PageDistillerCallback implementation. |
cjhopman
2014/02/03 21:47:22
This comment isn't really right. PageDistillerCall
shashi
2014/02/03 23:19:29
Done.
| |
77 void OnPageDistillationFinished(const GURL& page_url, | |
78 const DistilledPageInfo& distilled_page, | |
79 bool distillation_successful); | |
78 | 80 |
79 private: | 81 private: |
80 virtual void LoadURL(const GURL& url); | 82 virtual void FetchImage(DistilledPageProto* distilled_page_proto, |
81 virtual void FetchImage(const std::string& image_id, const std::string& item); | 83 const std::string& image_id, |
84 const std::string& item); | |
82 | 85 |
83 // Injects JavaScript to distill a loaded page down to its important content, | 86 // Adds url as a new page to the |article_proto_| and triggers distillation |
84 // e.g., extracting a news article from its surrounding boilerplate. | 87 // for the newly added page. |
85 void GetDistilledContent(); | 88 void DistillNextPage(const GURL& url); |
cjhopman
2014/02/03 21:47:22
You should be thinking about how to handle the cas
shashi
2014/02/03 23:19:29
Good point, currently there is only a next page he
| |
86 | 89 |
87 const DistillerPageFactory& distiller_page_factory_; | 90 // Distills the page. |
91 void DistillPage(const GURL& url); | |
92 | |
93 // Checks if all distillation callbacks are finished and runs the | |
94 // |distillation_cb_| if callbacks are finished. | |
95 void CheckIfAllCallbacksAreFinished(); | |
96 | |
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; | 97 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; |
89 scoped_ptr<DistillerPage> distiller_page_; | 98 scoped_ptr<PageDistiller> page_distiller_; |
90 DistillerCallback distillation_cb_; | 99 DistillerCallback distillation_cb_; |
91 | 100 |
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; | 101 std::map<std::string, DistillerURLFetcher*> image_fetchers_; |
93 | 102 scoped_ptr<DistilledArticleProto> article_proto_; |
94 scoped_ptr<DistilledPageProto> proto_; | 103 bool distillation_in_progress_; |
104 // Set to keep track of which urls are already seen by the distiller. | |
105 std::set<std::string> processed_urls_; | |
cjhopman
2014/02/03 21:47:22
I'd say that this should probably be a hash_set (b
shashi
2014/02/03 23:19:29
Done changed to use vector and std::find.
cjhopman
2014/02/03 23:56:53
Sorry, my comment was unclear. I really don't thin
shashi
2014/02/04 01:39:37
Ah, changed back to use hash_set.
On 2014/02/03 23
| |
95 | 106 |
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); | 107 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); |
97 }; | 108 }; |
98 | 109 |
99 } // namespace dom_distiller | 110 } // namespace dom_distiller |
100 | 111 |
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ | 112 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ |
OLD | NEW |