Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(85)

Side by Side Diff: components/dom_distiller/core/distiller.h

Issue 146843010: Add support for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address Chris' comments. Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
7 7
8 #include <map> 8 #include <map>
9 #include <set>
9 #include <string> 10 #include <string>
10 11
11 #include "base/callback.h" 12 #include "base/callback.h"
12 #include "base/gtest_prod_util.h" 13 #include "base/memory/scoped_ptr.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/values.h"
15 #include "components/dom_distiller/core/distiller_page.h"
16 #include "components/dom_distiller/core/distiller_url_fetcher.h" 14 #include "components/dom_distiller/core/distiller_url_fetcher.h"
17 #include "components/dom_distiller/core/proto/distilled_page.pb.h" 15 #include "components/dom_distiller/core/page_distiller.h"
16 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
18 #include "net/url_request/url_request_context_getter.h" 17 #include "net/url_request/url_request_context_getter.h"
19 #include "url/gurl.h" 18 #include "url/gurl.h"
20 19
21 namespace dom_distiller { 20 namespace dom_distiller {
22 21
23 class DistillerImpl; 22 class DistillerImpl;
24 23
25 class Distiller { 24 class Distiller {
26 public: 25 public:
27 typedef base::Callback<void( 26 typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)>
28 scoped_ptr<DistilledPageProto>)> DistillerCallback; 27 DistillerCallback;
29 virtual ~Distiller() {} 28 virtual ~Distiller() {}
30 29
31 // Distills a page, and asynchrounously returns the article HTML to the 30 // Distills a page, and asynchrounously returns the article HTML to the
32 // supplied callback. 31 // supplied callback.
33 virtual void DistillPage(const GURL& url, 32 virtual void DistillPage(const GURL& url,
34 const DistillerCallback& callback) = 0; 33 const DistillerCallback& callback) = 0;
35 }; 34 };
36 35
37 class DistillerFactory { 36 class DistillerFactory {
38 public: 37 public:
(...skipping 10 matching lines...) Expand all
49 virtual ~DistillerFactoryImpl(); 48 virtual ~DistillerFactoryImpl();
50 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE; 49 virtual scoped_ptr<Distiller> CreateDistiller() OVERRIDE;
51 50
52 private: 51 private:
53 scoped_ptr<DistillerPageFactory> distiller_page_factory_; 52 scoped_ptr<DistillerPageFactory> distiller_page_factory_;
54 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_; 53 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory_;
55 }; 54 };
56 55
57 // Distills a article from a page and associated pages. 56 // Distills a article from a page and associated pages.
58 class DistillerImpl : public Distiller, 57 class DistillerImpl : public Distiller,
59 public DistillerPage::Delegate { 58 public PageDistiller::PageDistillerCallback {
cjhopman 2014/02/03 21:47:22 This should not subclass a Callback.
shashi 2014/02/03 23:19:29 Done.
60 public: 59 public:
61 DistillerImpl( 60 DistillerImpl(
62 const DistillerPageFactory& distiller_page_factory, 61 const DistillerPageFactory& distiller_page_factory,
63 const DistillerURLFetcherFactory& distiller_url_fetcher_factory); 62 const DistillerURLFetcherFactory& distiller_url_fetcher_factory);
64 virtual ~DistillerImpl(); 63 virtual ~DistillerImpl();
65 64
66 // Creates an execution context. This must be called once before any calls are 65 // Creates an execution context. This must be called once before any calls are
67 // made to distill the page. 66 // made to distill the page.
68 virtual void Init(); 67 virtual void Init();
69 68
70 virtual void DistillPage(const GURL& url, 69 virtual void DistillPage(const GURL& url,
71 const DistillerCallback& callback) OVERRIDE; 70 const DistillerCallback& callback) OVERRIDE;
72 71
73 // PageDistillerContext::Delegate 72 void OnFetchImageDone(DistilledPageProto* distilled_page_proto,
74 virtual void OnLoadURLDone() OVERRIDE; 73 const std::string& id,
75 virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE; 74 const std::string& response);
76 75
77 void OnFetchImageDone(const std::string& id, const std::string& response); 76 // PageDistiller::PageDistillerCallback implementation.
cjhopman 2014/02/03 21:47:22 This comment isn't really right. PageDistillerCall
shashi 2014/02/03 23:19:29 Done.
77 void OnPageDistillationFinished(const GURL& page_url,
78 const DistilledPageInfo& distilled_page,
79 bool distillation_successful);
78 80
79 private: 81 private:
80 virtual void LoadURL(const GURL& url); 82 virtual void FetchImage(DistilledPageProto* distilled_page_proto,
81 virtual void FetchImage(const std::string& image_id, const std::string& item); 83 const std::string& image_id,
84 const std::string& item);
82 85
83 // Injects JavaScript to distill a loaded page down to its important content, 86 // Adds url as a new page to the |article_proto_| and triggers distillation
84 // e.g., extracting a news article from its surrounding boilerplate. 87 // for the newly added page.
85 void GetDistilledContent(); 88 void DistillNextPage(const GURL& url);
cjhopman 2014/02/03 21:47:22 You should be thinking about how to handle the cas
shashi 2014/02/03 23:19:29 Good point, currently there is only a next page he
86 89
87 const DistillerPageFactory& distiller_page_factory_; 90 // Distills the page.
91 void DistillPage(const GURL& url);
92
93 // Checks if all distillation callbacks are finished and runs the
94 // |distillation_cb_| if callbacks are finished.
95 void CheckIfAllCallbacksAreFinished();
96
88 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_; 97 const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
89 scoped_ptr<DistillerPage> distiller_page_; 98 scoped_ptr<PageDistiller> page_distiller_;
90 DistillerCallback distillation_cb_; 99 DistillerCallback distillation_cb_;
91 100
92 std::map<std::string, DistillerURLFetcher* > image_fetchers_; 101 std::map<std::string, DistillerURLFetcher*> image_fetchers_;
93 102 scoped_ptr<DistilledArticleProto> article_proto_;
94 scoped_ptr<DistilledPageProto> proto_; 103 bool distillation_in_progress_;
104 // Set to keep track of which urls are already seen by the distiller.
105 std::set<std::string> processed_urls_;
cjhopman 2014/02/03 21:47:22 I'd say that this should probably be a hash_set (b
shashi 2014/02/03 23:19:29 Done changed to use vector and std::find.
cjhopman 2014/02/03 23:56:53 Sorry, my comment was unclear. I really don't thin
shashi 2014/02/04 01:39:37 Ah, changed back to use hash_set. On 2014/02/03 23
95 106
96 DISALLOW_COPY_AND_ASSIGN(DistillerImpl); 107 DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
97 }; 108 };
98 109
99 } // namespace dom_distiller 110 } // namespace dom_distiller
100 111
101 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_ 112 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698