Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(154)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 146843010: Add support for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase address comments. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <map>
9 #include <string> 8 #include <string>
9 #include <vector>
10 10
11 #include "base/callback.h" 11 #include "base/callback.h"
12 #include "base/gtest_prod_util.h" 12 #include "base/containers/hash_tables.h"
13 #include "base/memory/ref_counted.h" 13 #include "base/memory/scoped_ptr.h"
14 #include "base/values.h"
15 #include "components/dom_distiller/core/distiller_page.h"
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" 14 #include "components/dom_distiller/core/distiller_url_fetcher.h"
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" 15 #include "components/dom_distiller/core/page_distiller.h"
16 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
18 #include "net/url_request/url_request_context_getter.h" 17 #include "net/url_request/url_request_context_getter.h"
19 #include "url/gurl.h" 18 #include "url/gurl.h"
20 19
21 namespace dom_distiller { 20 namespace dom_distiller {
22 21
23 class DistillerImpl; 22 class DistillerImpl;
24 23
25 class Distiller { 24 class Distiller {
26 public: 25 public:
27 typedef base::Callback<void( 26 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)>
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; 27 DistillerCallback;
29 virtual ~Distiller() {} 28 virtual ~Distiller() {}
30 29
31 // Distills a page, and asynchrounously returns the article HTML to the 30 // Distills a page, and asynchrounously returns the article HTML to the
32 // supplied callback. 31 // supplied callback.
33 virtual void DistillPage(const GURL& url, 32 virtual void DistillPage(const GURL& url,
34 const DistillerCallback& callback) = 0; 33 const DistillerCallback& callback) = 0;
35 }; 34 };
36 35
37 class DistillerFactory { 36 class DistillerFactory {
38 public: 37 public:
39 virtual scoped_ptr<Distiller> CreateDistiller() = 0; 38 virtual scoped_ptr<Distiller> CreateDistiller() = 0;
40 virtual ~DistillerFactory() {} 39 virtual ~DistillerFactory() {}
41 }; 40 };
42 41
43 // Factory for creating a Distiller. 42 // Factory for creating a Distiller.
44 class DistillerFactoryImpl : public DistillerFactory { 43 class DistillerFactoryImpl : public DistillerFactory {
45 public: 44 public:
46 DistillerFactoryImpl( 45 DistillerFactoryImpl(
47 scoped_ptr<DistillerPageFactory> distiller_page_factory, 46 scoped_ptr<DistillerPageFactory> distiller_page_factory,
48 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory); 47 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory);
49 virtual ~DistillerFactoryImpl(); 48 virtual ~DistillerFactoryImpl();
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; 49 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
51 50
52 private: 51 private:
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; 52 scoped_ptr<DistillerPageFactory> distiller_page_factory_;
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; 53 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
55 }; 54 };
56 55
57 // Distills a article from a page and associated pages. 56 // Distills a article from a page and associated pages.
58 class DistillerImpl : public Distiller, 57 class DistillerImpl : public Distiller {
59 public DistillerPage::Delegate {
60 public: 58 public:
61 DistillerImpl( 59 DistillerImpl(
62 const DistillerPageFactory& distiller_page_factory, 60 const DistillerPageFactory& distiller_page_factory,
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 61 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
64 virtual ~DistillerImpl(); 62 virtual ~DistillerImpl();
65 63
66 // Creates an execution context. This must be called once before any calls are 64 // Creates an execution context. This must be called once before any calls are
67 // made to distill the page. 65 // made to distill the page.
68 virtual void Init(); 66 virtual void Init();
69 67
70 virtual void DistillPage(const GURL& url, 68 virtual void DistillPage(const GURL& url,
71 const DistillerCallback& callback) OVERRIDE; 69 const DistillerCallback& callback) OVERRIDE;
72 70
73 // PageDistillerContext::Delegate 71 void OnFetchImageDone(DistilledPageProto* distilled_page_proto,
cjhopman 2014/02/03 23:56:53 nit: should be private
shashi 2014/02/04 01:39:37 Done.
74 virtual void OnLoadURLDone() OVERRIDE; 72 const std::string& id,
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; 73 const std::string& response);
76 74
77 void OnFetchImageDone(const std::string& id, const std::string& response); 75 void OnPageDistillationFinished(const GURL& page_url,
cjhopman 2014/02/03 23:56:53 nit: should be private
shashi 2014/02/04 01:39:37 Done.
76 const DistilledPageInfo& distilled_page,
77 bool distillation_successful);
78 78
79 private: 79 private:
80 virtual void LoadURL(const GURL& url); 80 virtual void FetchImage(DistilledPageProto* distilled_page_proto,
81 virtual void FetchImage(const std::string& image_id, const std::string& item); 81 const std::string& image_id,
82 const std::string& item);
82 83
83 // Injects JavaScript to distill a loaded page down to its important content, 84 // Distills the page and adds the new page to |article_proto|.
84 // e.g., extracting a news article from its surrounding boilerplate. 85 void DistillPage(const GURL& url);
85 void GetDistilledContent();
86 86
87 const DistillerPageFactory& distiller_page_factory_; 87 // Checks if all distillation callbacks are finished and runs the
cjhopman 2014/02/03 23:56:53 nit: This reads a little awkward, how about "Runs
shashi 2014/02/04 01:39:37 Done.
88 // |distillation_cb_| if callbacks are finished.
89 void RunDistillerCallbackIfDone();
90
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 91 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
89 scoped_ptr<DistillerPage> distiller_page_; 92 scoped_ptr<PageDistiller> page_distiller_;
90 DistillerCallback distillation_cb_; 93 DistillerCallback distillation_cb_;
91 94
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; 95 base::hash_map<std::string, DistillerURLFetcher*> image_fetchers_;
93 96 scoped_ptr<DistilledArticleProto> article_proto_;
94 scoped_ptr<DistilledPageProto> proto_; 97 bool distillation_in_progress_;
98 // Set to keep track of which urls are already seen by the distiller.
99 std::vector<std::string> processed_urls_;
95 100
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 101 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
97 }; 102 };
98 103
99 } // namespace dom_distiller 104 } // namespace dom_distiller
100 105
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 106 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698