Index: components/dom_distiller/core/distiller.cc |
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc |
index 84b8a0508db4eb8951c5b7443da8f6aa4cbe3d9b..0f25e694a56023d5c876f554f72b787d59ffed6f 100644 |
--- a/components/dom_distiller/core/distiller.cc |
+++ b/components/dom_distiller/core/distiller.cc |
@@ -8,17 +8,26 @@ |
#include "base/bind.h" |
#include "base/callback.h" |
+#include "base/location.h" |
+#include "base/message_loop/message_loop.h" |
+#include "base/strings/string_number_conversions.h" |
#include "base/strings/stringprintf.h" |
#include "base/strings/utf_string_conversions.h" |
#include "base/values.h" |
#include "components/dom_distiller/core/distiller_page.h" |
#include "components/dom_distiller/core/distiller_url_fetcher.h" |
+#include "components/dom_distiller/core/proto/distilled_article.pb.h" |
#include "components/dom_distiller/core/proto/distilled_page.pb.h" |
#include "grit/dom_distiller_resources.h" |
#include "net/url_request/url_request_context_getter.h" |
#include "ui/base/resource/resource_bundle.h" |
#include "url/gurl.h" |
+namespace { |
+// Maximum number of distilled pages in a article. |
+const int kMaxPagesInArticle = 32; |
+} |
+ |
namespace dom_distiller { |
DistillerFactoryImpl::DistillerFactoryImpl( |
@@ -39,8 +48,9 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
DistillerImpl::DistillerImpl( |
const DistillerPageFactory& distiller_page_factory, |
const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
- : distiller_page_factory_(distiller_page_factory), |
- distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { |
+ : distiller_page_factory_(distiller_page_factory), |
+ distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
+ distillation_in_progress_(false) { |
distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); |
} |
@@ -48,14 +58,39 @@ DistillerImpl::~DistillerImpl() { |
} |
void DistillerImpl::Init() { |
+ DCHECK(!distillation_in_progress_); |
distiller_page_->Init(); |
+ article_proto_.reset(new DistilledArticleProto()); |
} |
void DistillerImpl::DistillPage(const GURL& url, |
const DistillerCallback& distillation_cb) { |
+ DCHECK(!distillation_in_progress_); |
distillation_cb_ = distillation_cb; |
- proto_.reset(new DistilledPageProto()); |
- proto_->set_url(url.spec()); |
+ DistillNextPage(url); |
+} |
+ |
+void DistillerImpl::DistillNextPage(const GURL& url) { |
+ DCHECK(!distillation_in_progress_); |
+ if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
+ processed_urls_.find(url.spec()) == processed_urls_.end()) { |
+ distillation_in_progress_ = true; |
+ // Distill the next page. |
+ base::MessageLoop::current()->PostTask( |
+ FROM_HERE, |
+ base::Bind( |
+ &DistillerImpl::AddAndDistillPage, base::Unretained(this), url)); |
+ } else { |
+ DistillationTaskComplete(); |
+ } |
+} |
+ |
+void DistillerImpl::AddAndDistillPage(const GURL& url) { |
+ DCHECK(distillation_in_progress_); |
+ DCHECK(url.is_valid()); |
+ DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
+ DistilledPageProto* page_proto = article_proto_->add_pages(); |
Yaron
2014/01/29 20:03:41
Why do you add the page before distillation occurs
shashi
2014/01/29 22:51:37
Done. No, it was just inline with previous code th
|
+ page_proto->set_url(url.spec()); |
LoadURL(url); |
} |
@@ -75,14 +110,21 @@ void DistillerImpl::GetDistilledContent() { |
} |
void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { |
+ DCHECK(distillation_in_progress_); |
+ |
std::string result; |
- bool fetched_image = false; |
const base::ListValue* result_list = NULL; |
if (!value->GetAsList(&result_list)) { |
- DCHECK(proto_); |
- distillation_cb_.Run(proto_.Pass()); |
+ distillation_in_progress_ = false; |
+ DistillationTaskComplete(); |
return; |
} |
+ |
+ int index = article_proto_->pages_size() - 1; |
+ DCHECK_GE(index, 0); |
+ |
+ DistilledPageProto* current_page = GetLastPage(); |
Yaron
2014/01/29 20:03:41
While we aren't planning to do in-parallel distill
shashi
2014/01/29 22:51:37
Parallel distillation will require creating multip
|
+ GURL next_page_url; |
int i = 0; |
for (base::ListValue::const_iterator iter = result_list->begin(); |
iter != result_list->end(); ++iter, ++i) { |
@@ -93,36 +135,61 @@ void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { |
// elements are image URLs referenced in the HTML. |
switch (i) { |
case 0: |
- proto_->set_title(item); |
+ // Set the title of the article as the title of the first page. |
+ if (article_proto_->pages_size() == 1) |
+ article_proto_->set_title(item); |
break; |
case 1: |
- proto_->set_html(item); |
+ current_page->set_html(item); |
break; |
+ case 2: { |
+ next_page_url = GURL(item); |
+ if (next_page_url.is_valid()) { |
+ GURL current_page_url(current_page->url()); |
+ // The pages should be in same origin. |
+ DCHECK_EQ(next_page_url.GetOrigin(), current_page_url.GetOrigin()); |
+ } |
+ break; |
+ } |
default: |
- int image_number = i - 2; |
- std::string image_id = base::StringPrintf("%d", image_number); |
- FetchImage(image_id, item); |
- fetched_image = true; |
+ int page_number = article_proto_->pages_size(); |
+ int image_number = i - 3; |
+ std::string image_id = base::IntToString(page_number) + "_" + |
+ base::IntToString(image_number); |
+ FetchImage(current_page, image_id, item); |
} |
} |
- if (!fetched_image) |
- distillation_cb_.Run(proto_.Pass()); |
+ processed_urls_.insert(current_page->url()); |
+ distillation_in_progress_ = false; |
+ DistillNextPage(next_page_url); |
} |
-void DistillerImpl::FetchImage(const std::string& image_id, |
+DistilledPageProto* DistillerImpl::GetLastPage() const { |
+ DCHECK_GT(article_proto_->pages_size(), 0); |
+ int index = article_proto_->pages_size() - 1; |
+ DCHECK_GE(index, 0); |
+ return article_proto_->mutable_pages(index); |
+} |
+ |
+void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
+ const std::string& image_id, |
const std::string& item) { |
DistillerURLFetcher* fetcher = |
distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
image_fetchers_[image_id] = fetcher; |
fetcher->FetchURL(item, |
base::Bind(&DistillerImpl::OnFetchImageDone, |
- base::Unretained(this), image_id)); |
+ base::Unretained(this), |
+ base::Unretained(distilled_page_proto), |
+ image_id)); |
} |
-void DistillerImpl::OnFetchImageDone(const std::string& id, |
+void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
+ const std::string& id, |
const std::string& response) { |
- DCHECK(proto_); |
- DistilledPageProto_Image* image = proto_->add_image(); |
+ DCHECK_GT(article_proto_->pages_size(), 0); |
+ DCHECK(distilled_page_proto); |
+ DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
image->set_name(id); |
image->set_data(response); |
DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); |
@@ -130,8 +197,12 @@ void DistillerImpl::OnFetchImageDone(const std::string& id, |
int result = image_fetchers_.erase(id); |
delete fetcher; |
DCHECK_EQ(1, result); |
- if (image_fetchers_.empty()) { |
- distillation_cb_.Run(proto_.Pass()); |
+ DistillationTaskComplete(); |
Yaron
2014/01/29 20:03:41
Naming feels wrong because of this case. I had to
shashi
2014/01/29 22:51:37
Changed, hopefully better.
On 2014/01/29 20:03:41,
|
+} |
+ |
+void DistillerImpl::DistillationTaskComplete() { |
+ if (image_fetchers_.empty() && !distillation_in_progress_) { |
+ distillation_cb_.Run(article_proto_.Pass()); |
} |
} |