Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(238)

Unified Diff: components/dom_distiller/core/distiller.h

Issue 146843010: Add support for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address Chris' comments. Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/core/distiller.h
diff --git a/components/dom_distiller/core/distiller.h b/components/dom_distiller/core/distiller.h
index adbd41b8cbb294a99f14f15740e6b40c839d767e..65bffc74ef5c8623f987f9e210c5cb56bd3d34ca 100644
--- a/components/dom_distiller/core/distiller.h
+++ b/components/dom_distiller/core/distiller.h
@@ -6,15 +6,14 @@
#define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_H_
#include <map>
+#include <set>
#include <string>
#include "base/callback.h"
-#include "base/gtest_prod_util.h"
-#include "base/memory/ref_counted.h"
-#include "base/values.h"
-#include "components/dom_distiller/core/distiller_page.h"
+#include "base/memory/scoped_ptr.h"
#include "components/dom_distiller/core/distiller_url_fetcher.h"
-#include "components/dom_distiller/core/proto/distilled_page.pb.h"
+#include "components/dom_distiller/core/page_distiller.h"
+#include "components/dom_distiller/core/proto/distilled_article.pb.h"
#include "net/url_request/url_request_context_getter.h"
#include "url/gurl.h"
@@ -24,8 +23,8 @@ class DistillerImpl;
class Distiller {
public:
- typedef base::Callback<void(
- scoped_ptr<DistilledPageProto>)> DistillerCallback;
+ typedef base::Callback<void(scoped_ptr<DistilledArticleProto>)>
+ DistillerCallback;
virtual ~Distiller() {}
// Distills a page, and asynchrounously returns the article HTML to the
@@ -56,7 +55,7 @@ class DistillerFactoryImpl : public DistillerFactory {
// Distills a article from a page and associated pages.
class DistillerImpl : public Distiller,
- public DistillerPage::Delegate {
+ public PageDistiller::PageDistillerCallback {
cjhopman 2014/02/03 21:47:22 This should not subclass a Callback.
shashi 2014/02/03 23:19:29 Done.
public:
DistillerImpl(
const DistillerPageFactory& distiller_page_factory,
@@ -70,28 +69,40 @@ class DistillerImpl : public Distiller,
virtual void DistillPage(const GURL& url,
const DistillerCallback& callback) OVERRIDE;
- // PageDistillerContext::Delegate
- virtual void OnLoadURLDone() OVERRIDE;
- virtual void OnExecuteJavaScriptDone(const base::Value* value) OVERRIDE;
+ void OnFetchImageDone(DistilledPageProto* distilled_page_proto,
+ const std::string& id,
+ const std::string& response);
- void OnFetchImageDone(const std::string& id, const std::string& response);
+ // PageDistiller::PageDistillerCallback implementation.
cjhopman 2014/02/03 21:47:22 This comment isn't really right. PageDistillerCall
shashi 2014/02/03 23:19:29 Done.
+ void OnPageDistillationFinished(const GURL& page_url,
+ const DistilledPageInfo& distilled_page,
+ bool distillation_successful);
private:
- virtual void LoadURL(const GURL& url);
- virtual void FetchImage(const std::string& image_id, const std::string& item);
+ virtual void FetchImage(DistilledPageProto* distilled_page_proto,
+ const std::string& image_id,
+ const std::string& item);
- // Injects JavaScript to distill a loaded page down to its important content,
- // e.g., extracting a news article from its surrounding boilerplate.
- void GetDistilledContent();
+ // Adds url as a new page to the |article_proto_| and triggers distillation
+ // for the newly added page.
+ void DistillNextPage(const GURL& url);
cjhopman 2014/02/03 21:47:22 You should be thinking about how to handle the cas
shashi 2014/02/03 23:19:29 Good point, currently there is only a next page he
+
+ // Distills the page.
+ void DistillPage(const GURL& url);
+
+ // Checks if all distillation callbacks are finished and runs the
+ // |distillation_cb_| if callbacks are finished.
+ void CheckIfAllCallbacksAreFinished();
- const DistillerPageFactory& distiller_page_factory_;
const DistillerURLFetcherFactory& distiller_url_fetcher_factory_;
- scoped_ptr<DistillerPage> distiller_page_;
+ scoped_ptr<PageDistiller> page_distiller_;
DistillerCallback distillation_cb_;
- std::map<std::string, DistillerURLFetcher* > image_fetchers_;
-
- scoped_ptr<DistilledPageProto> proto_;
+ std::map<std::string, DistillerURLFetcher*> image_fetchers_;
+ scoped_ptr<DistilledArticleProto> article_proto_;
+ bool distillation_in_progress_;
+ // Set to keep track of which urls are already seen by the distiller.
+ std::set<std::string> processed_urls_;
cjhopman 2014/02/03 21:47:22 I'd say that this should probably be a hash_set (b
shashi 2014/02/03 23:19:29 Done changed to use vector and std::find.
cjhopman 2014/02/03 23:56:53 Sorry, my comment was unclear. I really don't thin
shashi 2014/02/04 01:39:37 Ah, changed back to use hash_set. On 2014/02/03 23
DISALLOW_COPY_AND_ASSIGN(DistillerImpl);
};

Powered by Google App Engine
This is Rietveld 408576698