Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Unified Diff: components/dom_distiller/core/distiller.cc

Issue 146843010: Add support for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address Chris' comments. Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/core/distiller.cc
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc
index 84b8a0508db4eb8951c5b7443da8f6aa4cbe3d9b..f36e339a801d19ec503caa15317efa2857e0c2c5 100644
--- a/components/dom_distiller/core/distiller.cc
+++ b/components/dom_distiller/core/distiller.cc
@@ -8,16 +8,21 @@
#include "base/bind.h"
#include "base/callback.h"
-#include "base/strings/stringprintf.h"
+#include "base/location.h"
+#include "base/message_loop/message_loop.h"
+#include "base/strings/string_number_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "components/dom_distiller/core/distiller_page.h"
#include "components/dom_distiller/core/distiller_url_fetcher.h"
+#include "components/dom_distiller/core/proto/distilled_article.pb.h"
#include "components/dom_distiller/core/proto/distilled_page.pb.h"
-#include "grit/dom_distiller_resources.h"
#include "net/url_request/url_request_context_getter.h"
-#include "ui/base/resource/resource_bundle.h"
-#include "url/gurl.h"
+
+namespace {
+// Maximum number of distilled pages in a article.
cjhopman 2014/02/03 21:47:22 s/ a / an /
shashi 2014/02/03 23:19:29 Done.
+const int kMaxPagesInArticle = 32;
+}
namespace dom_distiller {
@@ -39,90 +44,108 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
DistillerImpl::DistillerImpl(
const DistillerPageFactory& distiller_page_factory,
const DistillerURLFetcherFactory& distiller_url_fetcher_factory)
- : distiller_page_factory_(distiller_page_factory),
- distiller_url_fetcher_factory_(distiller_url_fetcher_factory) {
- distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass();
+ : distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
+ distillation_in_progress_(false) {
+ page_distiller_.reset(new PageDistiller(distiller_page_factory));
}
DistillerImpl::~DistillerImpl() {
}
void DistillerImpl::Init() {
- distiller_page_->Init();
+ DCHECK(!distillation_in_progress_);
+ page_distiller_->Init();
+ article_proto_.reset(new DistilledArticleProto());
}
void DistillerImpl::DistillPage(const GURL& url,
const DistillerCallback& distillation_cb) {
+ DCHECK(!distillation_in_progress_);
distillation_cb_ = distillation_cb;
- proto_.reset(new DistilledPageProto());
- proto_->set_url(url.spec());
- LoadURL(url);
+ DistillPage(url);
}
-void DistillerImpl::LoadURL(const GURL& url) {
- distiller_page_->LoadURL(url);
+void DistillerImpl::DistillPage(const GURL& url) {
+ DCHECK(!distillation_in_progress_);
+ if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle &&
+ processed_urls_.find(url.spec()) == processed_urls_.end()) {
+ distillation_in_progress_ = true;
+ // Distill the next page.
+ base::MessageLoop::current()->PostTask(
cjhopman 2014/02/03 21:47:22 Do we need to post a task here? Can't we just call
shashi 2014/02/03 23:19:29 I was afraid that it may recurse, because OnPageDi
cjhopman 2014/02/03 23:56:53 Ah, I see now. Now I think that we should either
shashi 2014/02/04 01:39:37 Done.
+ FROM_HERE,
+ base::Bind(
+ &DistillerImpl::DistillNextPage, base::Unretained(this), url));
+ } else {
+ CheckIfAllCallbacksAreFinished();
+ }
}
-void DistillerImpl::OnLoadURLDone() {
- GetDistilledContent();
+void DistillerImpl::DistillNextPage(const GURL& url) {
+ DCHECK(distillation_in_progress_);
+ DCHECK(url.is_valid());
+ DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle);
+ page_distiller_->DistillPage(
+ url,
+ base::Bind(&DistillerImpl::OnPageDistillationFinished,
+ base::Unretained(this),
+ url));
}
-void DistillerImpl::GetDistilledContent() {
- std::string script =
- ResourceBundle::GetSharedInstance().GetRawDataResource(
- IDR_DISTILLER_JS).as_string();
- distiller_page_->ExecuteJavaScript(script);
-}
+void DistillerImpl::OnPageDistillationFinished(
+ const GURL& page_url,
+ const DistilledPageInfo& distilled_page,
+ bool distillation_successful) {
+ DCHECK(distillation_in_progress_);
+ if (!distillation_successful) {
+ CheckIfAllCallbacksAreFinished();
+ } else {
+ DistilledPageProto* current_page = article_proto_->add_pages();
+ // Set the title of the article as the title of the first page.
+ if (article_proto_->pages_size() == 1) {
+ article_proto_->set_title(distilled_page.title);
+ }
-void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) {
- std::string result;
- bool fetched_image = false;
- const base::ListValue* result_list = NULL;
- if (!value->GetAsList(&result_list)) {
- DCHECK(proto_);
- distillation_cb_.Run(proto_.Pass());
- return;
- }
- int i = 0;
- for (base::ListValue::const_iterator iter = result_list->begin();
- iter != result_list->end(); ++iter, ++i) {
- std::string item;
- (*iter)->GetAsString(&item);
- // The JavaScript returns an array where the first element is the title,
- // the second element is the article content HTML, and the remaining
- // elements are image URLs referenced in the HTML.
- switch (i) {
- case 0:
- proto_->set_title(item);
- break;
- case 1:
- proto_->set_html(item);
- break;
- default:
- int image_number = i - 2;
- std::string image_id = base::StringPrintf("%d", image_number);
- FetchImage(image_id, item);
- fetched_image = true;
+ current_page->set_url(page_url.spec());
+ current_page->set_html(distilled_page.html);
+
+ GURL next_page_url(distilled_page.next_page_url);
+ if (next_page_url.is_valid()) {
+ // The pages should be in same origin.
+ DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
+ }
+
+ processed_urls_.insert(page_url.spec());
+ distillation_in_progress_ = false;
+ int page_number = article_proto_->pages_size();
+ for (size_t img_num = 0; img_num < distilled_page.image_urls.size();
+ ++img_num) {
+ std::string image_id =
+ base::IntToString(page_number) + "_" + base::IntToString(img_num);
+ FetchImage(current_page, image_id, distilled_page.image_urls[img_num]);
}
+ DistillPage(next_page_url);
}
- if (!fetched_image)
- distillation_cb_.Run(proto_.Pass());
}
-void DistillerImpl::FetchImage(const std::string& image_id,
+void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto,
+ const std::string& image_id,
const std::string& item) {
DistillerURLFetcher* fetcher =
distiller_url_fetcher_factory_.CreateDistillerURLFetcher();
image_fetchers_[image_id] = fetcher;
fetcher->FetchURL(item,
base::Bind(&DistillerImpl::OnFetchImageDone,
- base::Unretained(this), image_id));
+ base::Unretained(this),
+ base::Unretained(distilled_page_proto),
+ image_id));
}
-void DistillerImpl::OnFetchImageDone(const std::string& id,
+void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto,
+ const std::string& id,
const std::string& response) {
- DCHECK(proto_);
- DistilledPageProto_Image* image = proto_->add_image();
+ DCHECK_GT(article_proto_->pages_size(), 0);
+ DCHECK(distilled_page_proto);
+ DistilledPageProto_Image* image = distilled_page_proto->add_image();
image->set_name(id);
image->set_data(response);
DCHECK(image_fetchers_.end() != image_fetchers_.find(id));
@@ -130,8 +153,12 @@ void DistillerImpl::OnFetchImageDone(const std::string& id,
int result = image_fetchers_.erase(id);
delete fetcher;
cjhopman 2014/02/03 21:47:22 It looks like there is a lot going on in this clas
shashi 2014/02/03 23:19:29 Done, filed: http://crbug.com/340431 On 2014/02/0
DCHECK_EQ(1, result);
- if (image_fetchers_.empty()) {
- distillation_cb_.Run(proto_.Pass());
+ CheckIfAllCallbacksAreFinished();
+}
+
+void DistillerImpl::CheckIfAllCallbacksAreFinished() {
cjhopman 2014/02/03 21:47:22 I don't like this function name. I would expect th
shashi 2014/02/03 23:19:29 Done.
+ if (image_fetchers_.empty() && !distillation_in_progress_) {
+ distillation_cb_.Run(article_proto_.Pass());
}
}

Powered by Google App Engine
This is Rietveld 408576698