Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(695)

Side by Side Diff: components/dom_distiller/core/distiller.cc

Issue 178303004: Add incremental updates for multipage distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/distiller.h" 5 #include "components/dom_distiller/core/distiller.h"
6 6
7 #include <map> 7 #include <map>
8 #include <vector>
8 9
9 #include "base/bind.h" 10 #include "base/bind.h"
10 #include "base/callback.h" 11 #include "base/callback.h"
11 #include "base/location.h" 12 #include "base/location.h"
12 #include "base/message_loop/message_loop.h" 13 #include "base/message_loop/message_loop.h"
13 #include "base/strings/string_number_conversions.h" 14 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
15 #include "base/values.h" 16 #include "base/values.h"
16 #include "components/dom_distiller/core/distiller_page.h" 17 #include "components/dom_distiller/core/distiller_page.h"
17 #include "components/dom_distiller/core/distiller_url_fetcher.h" 18 #include "components/dom_distiller/core/distiller_url_fetcher.h"
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 90
90 DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index) 91 DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index)
91 const { 92 const {
92 DCHECK_LT(index, pages_.size()); 93 DCHECK_LT(index, pages_.size());
93 DistilledPageData* page_data = pages_[index]; 94 DistilledPageData* page_data = pages_[index];
94 DCHECK(page_data); 95 DCHECK(page_data);
95 return page_data; 96 return page_data;
96 } 97 }
97 98
98 void DistillerImpl::DistillPage(const GURL& url, 99 void DistillerImpl::DistillPage(const GURL& url,
99 const DistillerCallback& distillation_cb) { 100 const DistillationFinishedCallback& finished_cb,
101 const DistillationUpdateCallback& update_cb) {
100 DCHECK(AreAllPagesFinished()); 102 DCHECK(AreAllPagesFinished());
101 distillation_cb_ = distillation_cb; 103 finished_cb_ = finished_cb;
104 update_cb_ = update_cb;
102 105
103 AddToDistillationQueue(0, url); 106 AddToDistillationQueue(0, url);
104 DistillNextPage(); 107 DistillNextPage();
105 } 108 }
106 109
107 void DistillerImpl::DistillNextPage() { 110 void DistillerImpl::DistillNextPage() {
108 if (!waiting_pages_.empty()) { 111 if (!waiting_pages_.empty()) {
109 std::map<int, GURL>::iterator front = waiting_pages_.begin(); 112 std::map<int, GURL>::iterator front = waiting_pages_.begin();
110 int page_num = front->first; 113 int page_num = front->first;
111 const GURL url = front->second; 114 const GURL url = front->second;
(...skipping 17 matching lines...) Expand all
129 void DistillerImpl::OnPageDistillationFinished( 132 void DistillerImpl::OnPageDistillationFinished(
130 int page_num, 133 int page_num,
131 const GURL& page_url, 134 const GURL& page_url,
132 scoped_ptr<DistilledPageInfo> distilled_page, 135 scoped_ptr<DistilledPageInfo> distilled_page,
133 bool distillation_successful) { 136 bool distillation_successful) {
134 DCHECK(distilled_page.get()); 137 DCHECK(distilled_page.get());
135 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 138 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
136 if (distillation_successful) { 139 if (distillation_successful) {
137 DistilledPageData* page_data = 140 DistilledPageData* page_data =
138 GetPageAtIndex(started_pages_index_[page_num]); 141 GetPageAtIndex(started_pages_index_[page_num]);
139 DistilledPageProto* current_page = new DistilledPageProto(); 142 page_data->distilled_page_proto =
140 page_data->proto.reset(current_page); 143 new base::RefCountedData<DistilledPageProto>();
141 page_data->page_num = page_num; 144 page_data->page_num = page_num;
142 page_data->title = distilled_page->title; 145 page_data->title = distilled_page->title;
143 146
144 current_page->set_url(page_url.spec()); 147 page_data->distilled_page_proto->data.set_url(page_url.spec());
145 current_page->set_html(distilled_page->html); 148 page_data->distilled_page_proto->data.set_html(distilled_page->html);
146 149
147 GURL next_page_url(distilled_page->next_page_url); 150 GURL next_page_url(distilled_page->next_page_url);
148 if (next_page_url.is_valid()) { 151 if (next_page_url.is_valid()) {
149 // The pages should be in same origin. 152 // The pages should be in same origin.
150 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); 153 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
151 AddToDistillationQueue(page_num + 1, next_page_url); 154 AddToDistillationQueue(page_num + 1, next_page_url);
152 } 155 }
153 156
154 GURL prev_page_url(distilled_page->prev_page_url); 157 GURL prev_page_url(distilled_page->prev_page_url);
155 if (prev_page_url.is_valid()) { 158 if (prev_page_url.is_valid()) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 base::Unretained(fetcher), 191 base::Unretained(fetcher),
189 image_id)); 192 image_id));
190 } 193 }
191 194
192 void DistillerImpl::OnFetchImageDone(int page_num, 195 void DistillerImpl::OnFetchImageDone(int page_num,
193 DistillerURLFetcher* url_fetcher, 196 DistillerURLFetcher* url_fetcher,
194 const std::string& id, 197 const std::string& id,
195 const std::string& response) { 198 const std::string& response) {
196 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 199 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
197 DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); 200 DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
198 DCHECK(page_data->proto); 201 DCHECK(page_data->distilled_page_proto);
199 DCHECK(url_fetcher); 202 DCHECK(url_fetcher);
200 ScopedVector<DistillerURLFetcher>::iterator fetcher_it = 203 ScopedVector<DistillerURLFetcher>::iterator fetcher_it =
201 std::find(page_data->image_fetchers_.begin(), 204 std::find(page_data->image_fetchers_.begin(),
202 page_data->image_fetchers_.end(), 205 page_data->image_fetchers_.end(),
203 url_fetcher); 206 url_fetcher);
204 207
205 DCHECK(fetcher_it != page_data->image_fetchers_.end()); 208 DCHECK(fetcher_it != page_data->image_fetchers_.end());
206 // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone 209 // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone
207 // callback is invoked by the |url_fetcher|. 210 // callback is invoked by the |url_fetcher|.
208 page_data->image_fetchers_.weak_erase(fetcher_it); 211 page_data->image_fetchers_.weak_erase(fetcher_it);
209 base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); 212 base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher);
210 213
211 DistilledPageProto_Image* image = page_data->proto->add_image(); 214 DistilledPageProto_Image* image =
215 page_data->distilled_page_proto->data.add_image();
212 image->set_name(id); 216 image->set_name(id);
213 image->set_data(response); 217 image->set_data(response);
214 218
215 AddPageIfDone(page_num); 219 AddPageIfDone(page_num);
216 } 220 }
217 221
218 void DistillerImpl::AddPageIfDone(int page_num) { 222 void DistillerImpl::AddPageIfDone(int page_num) {
219 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 223 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
220 DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end()); 224 DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
221 DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); 225 DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
222 if (page_data->image_fetchers_.empty()) { 226 if (page_data->image_fetchers_.empty()) {
223 finished_pages_index_[page_num] = started_pages_index_[page_num]; 227 finished_pages_index_[page_num] = started_pages_index_[page_num];
224 started_pages_index_.erase(page_num); 228 started_pages_index_.erase(page_num);
229 const ArticleDistillationUpdate& article_update =
230 CreateDistillationUpdate();
231 DCHECK_EQ(article_update.GetPagesSize(), finished_pages_index_.size());
232 update_cb_.Run(article_update);
225 RunDistillerCallbackIfDone(); 233 RunDistillerCallbackIfDone();
226 } 234 }
227 } 235 }
228 236
237 const ArticleDistillationUpdate DistillerImpl::CreateDistillationUpdate()
238 const {
239 bool has_prev_page = false;
240 bool has_next_page = false;
241 if (!finished_pages_index_.empty()) {
242 int prev_page_num = finished_pages_index_.begin()->first - 1;
243 int next_page_num = finished_pages_index_.rbegin()->first + 1;
244 has_prev_page = IsPageNumberInUse(prev_page_num);
245 has_next_page = IsPageNumberInUse(next_page_num);
246 }
247
248 std::vector<scoped_refptr<ArticleDistillationUpdate::RefCountedPageProto> >
249 update_pages;
250 for (std::map<int, size_t>::const_iterator it = finished_pages_index_.begin();
251 it != finished_pages_index_.end();
252 ++it) {
253 update_pages.push_back(pages_[it->second]->distilled_page_proto);
254 }
255 return ArticleDistillationUpdate(update_pages, has_next_page, has_prev_page);
256 }
257
229 void DistillerImpl::RunDistillerCallbackIfDone() { 258 void DistillerImpl::RunDistillerCallbackIfDone() {
230 DCHECK(!distillation_cb_.is_null()); 259 DCHECK(!finished_cb_.is_null());
231 if (AreAllPagesFinished()) { 260 if (AreAllPagesFinished()) {
232 bool first_page = true; 261 bool first_page = true;
233 scoped_ptr<DistilledArticleProto> article_proto( 262 scoped_ptr<DistilledArticleProto> article_proto(
234 new DistilledArticleProto()); 263 new DistilledArticleProto());
235 // Stitch the pages back into the article. 264 // Stitch the pages back into the article.
236 for (std::map<int, size_t>::iterator it = finished_pages_index_.begin(); 265 for (std::map<int, size_t>::iterator it = finished_pages_index_.begin();
237 it != finished_pages_index_.end();) { 266 it != finished_pages_index_.end();) {
238 DistilledPageData* page_data = GetPageAtIndex(it->second); 267 DistilledPageData* page_data = GetPageAtIndex(it->second);
239 *(article_proto->add_pages()) = *(page_data->proto); 268 *(article_proto->add_pages()) = page_data->distilled_page_proto->data;
240 269
241 if (first_page) { 270 if (first_page) {
242 article_proto->set_title(page_data->title); 271 article_proto->set_title(page_data->title);
243 first_page = false; 272 first_page = false;
244 } 273 }
245 274
246 finished_pages_index_.erase(it++); 275 finished_pages_index_.erase(it++);
247 } 276 }
248 277
249 pages_.clear(); 278 pages_.clear();
250 DCHECK_LE(static_cast<size_t>(article_proto->pages_size()), 279 DCHECK_LE(static_cast<size_t>(article_proto->pages_size()),
251 max_pages_in_article_); 280 max_pages_in_article_);
252 281
253 DCHECK(pages_.empty()); 282 DCHECK(pages_.empty());
254 DCHECK(finished_pages_index_.empty()); 283 DCHECK(finished_pages_index_.empty());
255 distillation_cb_.Run(article_proto.Pass()); 284 finished_cb_.Run(article_proto.Pass());
256 distillation_cb_.Reset(); 285 finished_cb_.Reset();
257 } 286 }
258 } 287 }
259 288
260 } // namespace dom_distiller 289 } // namespace dom_distiller
OLDNEW
« no previous file with comments | « components/dom_distiller/core/distiller.h ('k') | components/dom_distiller/core/distiller_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698