OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/distiller.h" | 5 #include "components/dom_distiller/core/distiller.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
11 #include "base/strings/stringprintf.h" | 11 #include "base/location.h" |
12 #include "base/message_loop/message_loop.h" | |
13 #include "base/strings/string_number_conversions.h" | |
12 #include "base/strings/utf_string_conversions.h" | 14 #include "base/strings/utf_string_conversions.h" |
13 #include "base/values.h" | 15 #include "base/values.h" |
14 #include "components/dom_distiller/core/distiller_page.h" | 16 #include "components/dom_distiller/core/distiller_page.h" |
15 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 17 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
18 #include "components/dom_distiller/core/proto/distilled_article.pb.h" | |
16 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 19 #include "components/dom_distiller/core/proto/distilled_page.pb.h" |
17 #include "grit/dom_distiller_resources.h" | |
18 #include "net/url_request/url_request_context_getter.h" | 20 #include "net/url_request/url_request_context_getter.h" |
19 #include "ui/base/resource/resource_bundle.h" | 21 |
20 #include "url/gurl.h" | 22 namespace { |
23 // Maximum number of distilled pages in a article. | |
cjhopman
2014/02/03 21:47:22
s/ a / an /
shashi
2014/02/03 23:19:29
Done.
| |
24 const int kMaxPagesInArticle = 32; | |
25 } | |
21 | 26 |
22 namespace dom_distiller { | 27 namespace dom_distiller { |
23 | 28 |
24 DistillerFactoryImpl::DistillerFactoryImpl( | 29 DistillerFactoryImpl::DistillerFactoryImpl( |
25 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 30 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
26 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) | 31 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) |
27 : distiller_page_factory_(distiller_page_factory.Pass()), | 32 : distiller_page_factory_(distiller_page_factory.Pass()), |
28 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} | 33 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} |
29 | 34 |
30 DistillerFactoryImpl::~DistillerFactoryImpl() {} | 35 DistillerFactoryImpl::~DistillerFactoryImpl() {} |
31 | 36 |
32 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { | 37 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
33 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( | 38 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( |
34 *distiller_page_factory_, *distiller_url_fetcher_factory_)); | 39 *distiller_page_factory_, *distiller_url_fetcher_factory_)); |
35 distiller->Init(); | 40 distiller->Init(); |
36 return distiller.PassAs<Distiller>(); | 41 return distiller.PassAs<Distiller>(); |
37 } | 42 } |
38 | 43 |
39 DistillerImpl::DistillerImpl( | 44 DistillerImpl::DistillerImpl( |
40 const DistillerPageFactory& distiller_page_factory, | 45 const DistillerPageFactory& distiller_page_factory, |
41 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) | 46 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
42 : distiller_page_factory_(distiller_page_factory), | 47 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
43 distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { | 48 distillation_in_progress_(false) { |
44 distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); | 49 page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
45 } | 50 } |
46 | 51 |
47 DistillerImpl::~DistillerImpl() { | 52 DistillerImpl::~DistillerImpl() { |
48 } | 53 } |
49 | 54 |
50 void DistillerImpl::Init() { | 55 void DistillerImpl::Init() { |
51 distiller_page_->Init(); | 56 DCHECK(!distillation_in_progress_); |
57 page_distiller_->Init(); | |
58 article_proto_.reset(new DistilledArticleProto()); | |
52 } | 59 } |
53 | 60 |
54 void DistillerImpl::DistillPage(const GURL& url, | 61 void DistillerImpl::DistillPage(const GURL& url, |
55 const DistillerCallback& distillation_cb) { | 62 const DistillerCallback& distillation_cb) { |
63 DCHECK(!distillation_in_progress_); | |
56 distillation_cb_ = distillation_cb; | 64 distillation_cb_ = distillation_cb; |
57 proto_.reset(new DistilledPageProto()); | 65 DistillPage(url); |
58 proto_->set_url(url.spec()); | |
59 LoadURL(url); | |
60 } | 66 } |
61 | 67 |
62 void DistillerImpl::LoadURL(const GURL& url) { | 68 void DistillerImpl::DistillPage(const GURL& url) { |
63 distiller_page_->LoadURL(url); | 69 DCHECK(!distillation_in_progress_); |
70 if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && | |
71 processed_urls_.find(url.spec()) == processed_urls_.end()) { | |
72 distillation_in_progress_ = true; | |
73 // Distill the next page. | |
74 base::MessageLoop::current()->PostTask( | |
cjhopman
2014/02/03 21:47:22
Do we need to post a task here? Can't we just call
shashi
2014/02/03 23:19:29
I was afraid that it may recurse, because OnPageDi
cjhopman
2014/02/03 23:56:53
Ah, I see now.
Now I think that we should either
shashi
2014/02/04 01:39:37
Done.
| |
75 FROM_HERE, | |
76 base::Bind( | |
77 &DistillerImpl::DistillNextPage, base::Unretained(this), url)); | |
78 } else { | |
79 CheckIfAllCallbacksAreFinished(); | |
80 } | |
64 } | 81 } |
65 | 82 |
66 void DistillerImpl::OnLoadURLDone() { | 83 void DistillerImpl::DistillNextPage(const GURL& url) { |
67 GetDistilledContent(); | 84 DCHECK(distillation_in_progress_); |
85 DCHECK(url.is_valid()); | |
86 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); | |
87 page_distiller_->DistillPage( | |
88 url, | |
89 base::Bind(&DistillerImpl::OnPageDistillationFinished, | |
90 base::Unretained(this), | |
91 url)); | |
68 } | 92 } |
69 | 93 |
70 void DistillerImpl::GetDistilledContent() { | 94 void DistillerImpl::OnPageDistillationFinished( |
71 std::string script = | 95 const GURL& page_url, |
72 ResourceBundle::GetSharedInstance().GetRawDataResource( | 96 const DistilledPageInfo& distilled_page, |
73 IDR_DISTILLER_JS).as_string(); | 97 bool distillation_successful) { |
74 distiller_page_->ExecuteJavaScript(script); | 98 DCHECK(distillation_in_progress_); |
99 if (!distillation_successful) { | |
100 CheckIfAllCallbacksAreFinished(); | |
101 } else { | |
102 DistilledPageProto* current_page = article_proto_->add_pages(); | |
103 // Set the title of the article as the title of the first page. | |
104 if (article_proto_->pages_size() == 1) { | |
105 article_proto_->set_title(distilled_page.title); | |
106 } | |
107 | |
108 current_page->set_url(page_url.spec()); | |
109 current_page->set_html(distilled_page.html); | |
110 | |
111 GURL next_page_url(distilled_page.next_page_url); | |
112 if (next_page_url.is_valid()) { | |
113 // The pages should be in same origin. | |
114 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); | |
115 } | |
116 | |
117 processed_urls_.insert(page_url.spec()); | |
118 distillation_in_progress_ = false; | |
119 int page_number = article_proto_->pages_size(); | |
120 for (size_t img_num = 0; img_num < distilled_page.image_urls.size(); | |
121 ++img_num) { | |
122 std::string image_id = | |
123 base::IntToString(page_number) + "_" + base::IntToString(img_num); | |
124 FetchImage(current_page, image_id, distilled_page.image_urls[img_num]); | |
125 } | |
126 DistillPage(next_page_url); | |
127 } | |
75 } | 128 } |
76 | 129 |
77 void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { | 130 void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
78 std::string result; | 131 const std::string& image_id, |
79 bool fetched_image = false; | |
80 const base::ListValue* result_list = NULL; | |
81 if (!value->GetAsList(&result_list)) { | |
82 DCHECK(proto_); | |
83 distillation_cb_.Run(proto_.Pass()); | |
84 return; | |
85 } | |
86 int i = 0; | |
87 for (base::ListValue::const_iterator iter = result_list->begin(); | |
88 iter != result_list->end(); ++iter, ++i) { | |
89 std::string item; | |
90 (*iter)->GetAsString(&item); | |
91 // The JavaScript returns an array where the first element is the title, | |
92 // the second element is the article content HTML, and the remaining | |
93 // elements are image URLs referenced in the HTML. | |
94 switch (i) { | |
95 case 0: | |
96 proto_->set_title(item); | |
97 break; | |
98 case 1: | |
99 proto_->set_html(item); | |
100 break; | |
101 default: | |
102 int image_number = i - 2; | |
103 std::string image_id = base::StringPrintf("%d", image_number); | |
104 FetchImage(image_id, item); | |
105 fetched_image = true; | |
106 } | |
107 } | |
108 if (!fetched_image) | |
109 distillation_cb_.Run(proto_.Pass()); | |
110 } | |
111 | |
112 void DistillerImpl::FetchImage(const std::string& image_id, | |
113 const std::string& item) { | 132 const std::string& item) { |
114 DistillerURLFetcher* fetcher = | 133 DistillerURLFetcher* fetcher = |
115 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); | 134 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
116 image_fetchers_[image_id] = fetcher; | 135 image_fetchers_[image_id] = fetcher; |
117 fetcher->FetchURL(item, | 136 fetcher->FetchURL(item, |
118 base::Bind(&DistillerImpl::OnFetchImageDone, | 137 base::Bind(&DistillerImpl::OnFetchImageDone, |
119 base::Unretained(this), image_id)); | 138 base::Unretained(this), |
139 base::Unretained(distilled_page_proto), | |
140 image_id)); | |
120 } | 141 } |
121 | 142 |
122 void DistillerImpl::OnFetchImageDone(const std::string& id, | 143 void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
144 const std::string& id, | |
123 const std::string& response) { | 145 const std::string& response) { |
124 DCHECK(proto_); | 146 DCHECK_GT(article_proto_->pages_size(), 0); |
125 DistilledPageProto_Image* image = proto_->add_image(); | 147 DCHECK(distilled_page_proto); |
148 DistilledPageProto_Image* image = distilled_page_proto->add_image(); | |
126 image->set_name(id); | 149 image->set_name(id); |
127 image->set_data(response); | 150 image->set_data(response); |
128 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); | 151 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); |
129 DistillerURLFetcher* fetcher = image_fetchers_[id]; | 152 DistillerURLFetcher* fetcher = image_fetchers_[id]; |
130 int result = image_fetchers_.erase(id); | 153 int result = image_fetchers_.erase(id); |
131 delete fetcher; | 154 delete fetcher; |
cjhopman
2014/02/03 21:47:22
It looks like there is a lot going on in this clas
shashi
2014/02/03 23:19:29
Done, filed: http://crbug.com/340431
On 2014/02/0
| |
132 DCHECK_EQ(1, result); | 155 DCHECK_EQ(1, result); |
133 if (image_fetchers_.empty()) { | 156 CheckIfAllCallbacksAreFinished(); |
134 distillation_cb_.Run(proto_.Pass()); | 157 } |
158 | |
159 void DistillerImpl::CheckIfAllCallbacksAreFinished() { | |
cjhopman
2014/02/03 21:47:22
I don't like this function name. I would expect th
shashi
2014/02/03 23:19:29
Done.
| |
160 if (image_fetchers_.empty() && !distillation_in_progress_) { | |
161 distillation_cb_.Run(article_proto_.Pass()); | |
135 } | 162 } |
136 } | 163 } |
137 | 164 |
138 } // namespace dom_distiller | 165 } // namespace dom_distiller |
OLD | NEW |