Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(199)

Unified Diff: components/dom_distiller/core/distiller.cc

Issue 130543003: Store page no for distilled pages undergoing distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/core/distiller.cc
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc
index 8e155ebc2788356c16cdd2fcfde01df6df3a457b..e2abe3f57e5d40df204658795b5fa3ee5ebed97e 100644
--- a/components/dom_distiller/core/distiller.cc
+++ b/components/dom_distiller/core/distiller.cc
@@ -41,37 +41,53 @@ scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
return distiller.PassAs<Distiller>();
}
+DistillerImpl::DistilledPageData::DistilledPageData() {}
+
+DistillerImpl::DistilledPageData::~DistilledPageData() {}
+
DistillerImpl::DistillerImpl(
const DistillerPageFactory& distiller_page_factory,
const DistillerURLFetcherFactory& distiller_url_fetcher_factory)
- : distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
- distillation_in_progress_(false) {
+ : distiller_url_fetcher_factory_(distiller_url_fetcher_factory) {
page_distiller_.reset(new PageDistiller(distiller_page_factory));
}
-DistillerImpl::~DistillerImpl() {
- DCHECK(image_fetchers_.empty());
- DCHECK(!distillation_in_progress_);
-}
+DistillerImpl::~DistillerImpl() { DCHECK(in_progress_pages_.empty()); }
void DistillerImpl::Init() {
- DCHECK(!distillation_in_progress_);
+ DCHECK(in_progress_pages_.empty());
page_distiller_->Init();
article_proto_.reset(new DistilledArticleProto());
}
+void DistillerImpl::AddToDistillationQueue(int page_no, const GURL& url) {
+ if (!IsValidPageNo(page_no) && url.is_valid() &&
cjhopman 2014/02/12 20:39:09 Maybe rename IsValidPageNo to IsUsedPageNumber or
shashi 2014/02/13 01:03:11 Done.
+ article_proto_->pages_size() < kMaxPagesInArticle &&
cjhopman 2014/02/12 20:39:09 Should this account for pages in the queue but not
cjhopman 2014/02/12 20:39:09 Also, article_proto_ isn't actually created until
shashi 2014/02/13 01:03:11 Good catch, I missed that, added a test. On 2014/0
+ processed_urls_.find(url.spec()) == processed_urls_.end()) {
+ in_progress_pages_.insert(page_no);
+ pages_to_be_distilled_[page_no] = url;
+ }
+}
+
+bool DistillerImpl::IsValidPageNo(int page_no) const {
+ return in_progress_pages_.find(page_no) != in_progress_pages_.end() ||
+ distilled_pages_index_.find(page_no) != distilled_pages_index_.end();
+}
+
void DistillerImpl::DistillPage(const GURL& url,
const DistillerCallback& distillation_cb) {
- DCHECK(!distillation_in_progress_);
+ DCHECK(in_progress_pages_.empty());
distillation_cb_ = distillation_cb;
- DistillPage(url);
+
+ AddToDistillationQueue(0, url);
+ DistillNextPage();
}
-void DistillerImpl::DistillPage(const GURL& url) {
- DCHECK(!distillation_in_progress_);
- if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle &&
- processed_urls_.find(url.spec()) == processed_urls_.end()) {
- distillation_in_progress_ = true;
+void DistillerImpl::DistillNextPage() {
+ if (!pages_to_be_distilled_.empty()) {
+ std::map<int, GURL>::const_iterator front = pages_to_be_distilled_.begin();
+ int page_no = front->first;
+ const GURL url = front->second;
// Distill the next page.
DCHECK(url.is_valid());
DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle);
@@ -79,26 +95,27 @@ void DistillerImpl::DistillPage(const GURL& url) {
url,
base::Bind(&DistillerImpl::OnPageDistillationFinished,
base::Unretained(this),
- url));
- } else {
- RunDistillerCallbackIfDone();
+ page_no));
}
}
void DistillerImpl::OnPageDistillationFinished(
- const GURL& page_url,
+ int page_no,
scoped_ptr<DistilledPageInfo> distilled_page,
bool distillation_successful) {
- DCHECK(distillation_in_progress_);
DCHECK(distilled_page.get());
- if (!distillation_successful) {
- RunDistillerCallbackIfDone();
- } else {
- DistilledPageProto* current_page = article_proto_->add_pages();
- // Set the title of the article as the title of the first page.
- if (article_proto_->pages_size() == 1) {
- article_proto_->set_title(distilled_page->title);
- }
+ DCHECK(IsValidPageNo(page_no));
+ DCHECK(pages_to_be_distilled_.end() != pages_to_be_distilled_.find(page_no));
cjhopman 2014/02/12 20:39:09 nit: swap the order of this comparison: foo.find(_
shashi 2014/02/13 01:03:11 I also thought so, but replicated the style in thi
+ const GURL page_url = pages_to_be_distilled_[page_no];
+ pages_to_be_distilled_.erase(page_no);
+ if (distillation_successful) {
+ DistilledPageProto* current_page = new DistilledPageProto();
+ DistilledPageData* page_data = new DistilledPageData();
+ page_data->proto.reset(current_page);
+ page_data->page_no = page_no;
+ page_data->title = distilled_page->title;
+ distilled_pages_.push_back(page_data);
+ distilled_pages_index_[page_no] = distilled_pages_.size() - 1;
current_page->set_url(page_url.spec());
current_page->set_html(distilled_page->html);
@@ -110,56 +127,101 @@ void DistillerImpl::OnPageDistillationFinished(
}
processed_urls_.insert(page_url.spec());
- distillation_in_progress_ = false;
- int page_number = article_proto_->pages_size();
for (size_t img_num = 0; img_num < distilled_page->image_urls.size();
++img_num) {
std::string image_id =
- base::IntToString(page_number) + "_" + base::IntToString(img_num);
- FetchImage(current_page, image_id, distilled_page->image_urls[img_num]);
+ base::IntToString(page_no + 1) + "_" + base::IntToString(img_num);
+ FetchImage(page_data, image_id, distilled_page->image_urls[img_num]);
}
- DistillPage(next_page_url);
+
+ AddToDistillationQueue(page_no + 1, next_page_url);
+ CheckIfPageDone(page_data);
+ DistillNextPage();
+ } else {
+ in_progress_pages_.erase(page_no);
cjhopman 2014/02/12 20:39:09 I don't think this is properly handled in RunDisti
shashi 2014/02/13 01:03:11 Sorry, I do not understand the comment, can you be
cjhopman 2014/02/13 02:48:48 I mean that I wasn't sure what happens if one page
shashi 2014/02/13 20:09:51 I am not sure about the correct behavior either, c
+ RunDistillerCallbackIfDone();
}
+
+ DCHECK(pages_to_be_distilled_.end() == pages_to_be_distilled_.find(page_no));
cjhopman 2014/02/12 20:39:09 nit: swap order of this comparison
shashi 2014/02/13 01:03:11 Done.
}
-void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto,
+void DistillerImpl::FetchImage(DistilledPageData* distilled_page_data,
const std::string& image_id,
const std::string& item) {
+ DCHECK(distilled_page_data);
DistillerURLFetcher* fetcher =
distiller_url_fetcher_factory_.CreateDistillerURLFetcher();
- image_fetchers_.push_back(fetcher);
+ distilled_page_data->image_fetchers_.push_back(fetcher);
+
fetcher->FetchURL(item,
base::Bind(&DistillerImpl::OnFetchImageDone,
base::Unretained(this),
- base::Unretained(distilled_page_proto),
+ base::Unretained(distilled_page_data),
base::Unretained(fetcher),
image_id));
}
-void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto,
+void DistillerImpl::OnFetchImageDone(DistilledPageData* distilled_page_data,
DistillerURLFetcher* url_fetcher,
const std::string& id,
const std::string& response) {
- DCHECK_GT(article_proto_->pages_size(), 0);
- DCHECK(distilled_page_proto);
+ DCHECK(distilled_page_data);
+ DCHECK(distilled_page_data->proto);
DCHECK(url_fetcher);
ScopedVector<DistillerURLFetcher>::iterator fetcher_it =
- std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher);
+ std::find(distilled_page_data->image_fetchers_.begin(),
+ distilled_page_data->image_fetchers_.end(),
+ url_fetcher);
- DCHECK(fetcher_it != image_fetchers_.end());
+ DCHECK(fetcher_it != distilled_page_data->image_fetchers_.end());
// Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone
// callback is invoked by the |url_fetcher|.
- image_fetchers_.weak_erase(fetcher_it);
+ distilled_page_data->image_fetchers_.weak_erase(fetcher_it);
base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher);
- DistilledPageProto_Image* image = distilled_page_proto->add_image();
+
+ DistilledPageProto_Image* image = distilled_page_data->proto->add_image();
image->set_name(id);
image->set_data(response);
- RunDistillerCallbackIfDone();
+
+ CheckIfPageDone(distilled_page_data);
+}
+
+void DistillerImpl::CheckIfPageDone(
+ const DistilledPageData* distilled_page_data) {
+ DCHECK(distilled_page_data);
+ int page_no = distilled_page_data->page_no;
+ DCHECK(in_progress_pages_.end() != in_progress_pages_.find(page_no));
cjhopman 2014/02/12 20:39:09 nit: swap order
shashi 2014/02/13 01:03:11 Done.
shashi 2014/02/13 01:03:11 Done.
+ if (distilled_page_data->image_fetchers_.empty()) {
+ in_progress_pages_.erase(page_no);
+ RunDistillerCallbackIfDone();
+ }
}
void DistillerImpl::RunDistillerCallbackIfDone() {
- if (image_fetchers_.empty() && !distillation_in_progress_) {
+ DCHECK(!distillation_cb_.is_null());
+ if (in_progress_pages_.empty()) {
+ bool first_page = true;
+ // Stitch the pages back into the article.
+ for (std::map<int, size_t>::const_iterator it =
+ distilled_pages_index_.begin();
+ it != distilled_pages_index_.end();) {
+ const DistilledPageData* page_data = distilled_pages_[it->second];
+ *(article_proto_->add_pages()) = *(page_data->proto);
+
+ if (first_page) {
+ article_proto_->set_title(page_data->title);
+ first_page = false;
+ }
+
+ distilled_pages_index_.erase(it++);
+ }
+
+ distilled_pages_.clear();
+
+ DCHECK(distilled_pages_.empty());
+ DCHECK(distilled_pages_index_.empty());
distillation_cb_.Run(article_proto_.Pass());
+ distillation_cb_.Reset();
}
}
« components/dom_distiller/core/distiller.h ('K') | « components/dom_distiller/core/distiller.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698