Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(410)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 146843010: Add support for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase + change Viewer to use DomDistillerArticleProto Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <map> 8 #include <map>
9 #include <set>
9 #include <string> 10 #include <string>
10 11
11 #include "base/callback.h" 12 #include "base/callback.h"
12 #include "base/gtest_prod_util.h" 13 #include "base/gtest_prod_util.h"
13 #include "base/memory/ref_counted.h" 14 #include "base/memory/ref_counted.h"
14 #include "base/values.h" 15 #include "base/values.h"
15 #include "components/dom_distiller/core/distiller_page.h" 16 #include "components/dom_distiller/core/distiller_page.h"
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" 17 #include "components/dom_distiller/core/distiller_url_fetcher.h"
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" 18 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
18 #include "net/url_request/url_request_context_getter.h" 19 #include "net/url_request/url_request_context_getter.h"
19 #include "url/gurl.h" 20 #include "url/gurl.h"
20 21
21 namespace dom_distiller { 22 namespace dom_distiller {
22 23
23 class DistillerImpl; 24 class DistillerImpl;
24 25
25 class Distiller { 26 class Distiller {
26 public: 27 public:
27 typedef base::Callback<void( 28 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)>
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; 29 DistillerCallback;
29 virtual ~Distiller() {} 30 virtual ~Distiller() {}
30 31
31 // Distills a page, and asynchrounously returns the article HTML to the 32 // Distills a page, and asynchrounously returns the article HTML to the
32 // supplied callback. 33 // supplied callback.
33 virtual void DistillPage(const GURL& url, 34 virtual void DistillPage(const GURL& url,
34 const DistillerCallback& callback) = 0; 35 const DistillerCallback& callback) = 0;
35 }; 36 };
36 37
37 class DistillerFactory { 38 class DistillerFactory {
38 public: 39 public:
39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; 40 virtual scoped_ptr<Distiller> CreateDistiller() = 0;
40 virtual ~DistillerFactory() {} 41 virtual ~DistillerFactory() {}
41 }; 42 };
42 43
43 // Factory for creating a Distiller. 44 // Factory for creating a Distiller.
44 class DistillerFactoryImpl : public DistillerFactory { 45 class DistillerFactoryImpl : public DistillerFactory {
45 public: 46 public:
46 DistillerFactoryImpl( 47 DistillerFactoryImpl(
47 scoped_ptr<DistillerPageFactory> distiller_page_factory, 48 scoped_ptr<DistillerPageFactory> distiller_page_factory,
48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); 49 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory);
49 virtual ~DistillerFactoryImpl(); 50 virtual ~DistillerFactoryImpl();
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; 51 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
51 52
52 private: 53 private:
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; 54 scoped_ptr<DistillerPageFactory> distiller_page_factory_;
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; 55 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
55 }; 56 };
56 57
57 // Distills a article from a page and associated pages. 58 // Distills a article from a page and associated pages.
cjhopman 2014/01/29 21:44:13 This class is starting to handle a lot of things:
shashi 2014/01/29 22:51:37 Makes sense, but I will prefer to do it in a separ
58 class DistillerImpl : public Distiller, 59 class DistillerImpl : public Distiller,
59 public DistillerPage::Delegate { 60 public DistillerPage::Delegate {
60 public: 61 public:
61 DistillerImpl( 62 DistillerImpl(
62 const DistillerPageFactory& distiller_page_factory, 63 const DistillerPageFactory& distiller_page_factory,
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 64 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
64 virtual ~DistillerImpl(); 65 virtual ~DistillerImpl();
65 66
66 // Creates an execution context. This must be called once before any calls are 67 // Creates an execution context. This must be called once before any calls are
67 // made to distill the page. 68 // made to distill the page.
68 virtual void Init(); 69 virtual void Init();
69 70
70 virtual void DistillPage(const GURL& url, 71 virtual void DistillPage(const GURL& url,
71 const DistillerCallback& callback) OVERRIDE; 72 const DistillerCallback& callback) OVERRIDE;
72 73
73 // PageDistillerContext::Delegate 74 // PageDistillerContext::Delegate
74 virtual void OnLoadURLDone() OVERRIDE; 75 virtual void OnLoadURLDone() OVERRIDE;
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; 76 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE;
76 77
77 void OnFetchImageDone(const std::string& id, const std::string& response); 78 void OnFetchImageDone(DistilledPageProto* distilled_page_proto,
79 const std::string& id,
80 const std::string& response);
78 81
79 private: 82 private:
80 virtual void LoadURL(const GURL& url); 83 virtual void LoadURL(const GURL& url);
81 virtual void FetchImage(const std::string& image_id, const std::string& item); 84 virtual void FetchImage(DistilledPageProto* distilled_page_proto,
85 const std::string& image_id,
86 const std::string& item);
82 87
83 // Injects JavaScript to distill a loaded page down to its important content, 88 // Injects JavaScript to distill a loaded page down to its important content,
84 // e.g., extracting a news article from its surrounding boilerplate. 89 // e.g., extracting a news article from its surrounding boilerplate.
85 void GetDistilledContent(); 90 void GetDistilledContent();
86 91
92 // Adds url as a new page to the |article_proto_| and triggers distillation
93 // for the newly added page.
94 void AddAndDistillPage(const GURL& url);
95
96 // Distills the page.
97 void DistillNextPage(const GURL& url);
98
99 // Runs distillation callback when all distillation tasks are finished.
100 void DistillationTaskComplete();
101
102 // Returns the last page of |article_proto_|.
103 DistilledPageProto* GetLastPage() const;
104
87 const DistillerPageFactory& distiller_page_factory_; 105 const DistillerPageFactory& distiller_page_factory_;
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 106 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
89 scoped_ptr<DistillerPage> distiller_page_; 107 scoped_ptr<DistillerPage> distiller_page_;
90 DistillerCallback distillation_cb_; 108 DistillerCallback distillation_cb_;
91 109
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; 110 std::map<std::string, DistillerURLFetcher* > image_fetchers_;
93 111 scoped_ptr<DistilledArticleProto> article_proto_;
94 scoped_ptr<DistilledPageProto> proto_; 112 bool distillation_in_progress_;
113 // Set to keep track of which urls are already seen by the distiller.
114 std::set<std::string> processed_urls_;
95 115
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 116 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
97 }; 117 };
98 118
99 } // namespace dom_distiller 119 } // namespace dom_distiller
100 120
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 121 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698