OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/distiller.h" | 5 #include "components/dom_distiller/core/distiller.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
| 11 #include "base/location.h" |
| 12 #include "base/message_loop/message_loop.h" |
| 13 #include "base/strings/string_number_conversions.h" |
11 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" |
12 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
13 #include "base/values.h" | 16 #include "base/values.h" |
14 #include "components/dom_distiller/core/distiller_page.h" | 17 #include "components/dom_distiller/core/distiller_page.h" |
15 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 18 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
| 19 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
16 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 20 #include "components/dom_distiller/core/proto/distilled_page.pb.h" |
17 #include "grit/dom_distiller_resources.h" | 21 #include "grit/dom_distiller_resources.h" |
18 #include "net/url_request/url_request_context_getter.h" | 22 #include "net/url_request/url_request_context_getter.h" |
19 #include "ui/base/resource/resource_bundle.h" | 23 #include "ui/base/resource/resource_bundle.h" |
20 #include "url/gurl.h" | 24 #include "url/gurl.h" |
21 | 25 |
| 26 namespace { |
| 27 // Maximum number of distilled pages in a article. |
| 28 const int kMaxPagesInArticle = 32; |
| 29 } |
| 30 |
22 namespace dom_distiller { | 31 namespace dom_distiller { |
23 | 32 |
24 DistillerFactoryImpl::DistillerFactoryImpl( | 33 DistillerFactoryImpl::DistillerFactoryImpl( |
25 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 34 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
26 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) | 35 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) |
27 : distiller_page_factory_(distiller_page_factory.Pass()), | 36 : distiller_page_factory_(distiller_page_factory.Pass()), |
28 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} | 37 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} |
29 | 38 |
30 DistillerFactoryImpl::~DistillerFactoryImpl() {} | 39 DistillerFactoryImpl::~DistillerFactoryImpl() {} |
31 | 40 |
32 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { | 41 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
33 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( | 42 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( |
34 *distiller_page_factory_, *distiller_url_fetcher_factory_)); | 43 *distiller_page_factory_, *distiller_url_fetcher_factory_)); |
35 distiller->Init(); | 44 distiller->Init(); |
36 return distiller.PassAs<Distiller>(); | 45 return distiller.PassAs<Distiller>(); |
37 } | 46 } |
38 | 47 |
39 DistillerImpl::DistillerImpl( | 48 DistillerImpl::DistillerImpl( |
40 const DistillerPageFactory& distiller_page_factory, | 49 const DistillerPageFactory& distiller_page_factory, |
41 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) | 50 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
42 : distiller_page_factory_(distiller_page_factory), | 51 : distiller_page_factory_(distiller_page_factory), |
43 distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { | 52 distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
| 53 distillation_in_progress_(false) { |
44 distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); | 54 distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); |
45 } | 55 } |
46 | 56 |
47 DistillerImpl::~DistillerImpl() { | 57 DistillerImpl::~DistillerImpl() { |
48 } | 58 } |
49 | 59 |
50 void DistillerImpl::Init() { | 60 void DistillerImpl::Init() { |
| 61 DCHECK(!distillation_in_progress_); |
51 distiller_page_->Init(); | 62 distiller_page_->Init(); |
| 63 article_proto_.reset(new DistilledArticleProto()); |
52 } | 64 } |
53 | 65 |
54 void DistillerImpl::DistillPage(const GURL& url, | 66 void DistillerImpl::DistillPage(const GURL& url, |
55 const DistillerCallback& distillation_cb) { | 67 const DistillerCallback& distillation_cb) { |
| 68 DCHECK(!distillation_in_progress_); |
56 distillation_cb_ = distillation_cb; | 69 distillation_cb_ = distillation_cb; |
57 proto_.reset(new DistilledPageProto()); | 70 DistillNextPage(url); |
58 proto_->set_url(url.spec()); | 71 } |
| 72 |
| 73 void DistillerImpl::DistillNextPage(const GURL& url) { |
| 74 DCHECK(!distillation_in_progress_); |
| 75 if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
| 76 processed_urls_.find(url.spec()) == processed_urls_.end()) { |
| 77 distillation_in_progress_ = true; |
| 78 // Distill the next page. |
| 79 base::MessageLoop::current()->PostTask( |
| 80 FROM_HERE, |
| 81 base::Bind( |
| 82 &DistillerImpl::AddAndDistillPage, base::Unretained(this), url)); |
| 83 } else { |
| 84 DistillationTaskComplete(); |
| 85 } |
| 86 } |
| 87 |
| 88 void DistillerImpl::AddAndDistillPage(const GURL& url) { |
| 89 DCHECK(distillation_in_progress_); |
| 90 DCHECK(url.is_valid()); |
| 91 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
| 92 DistilledPageProto* page_proto = article_proto_->add_pages(); |
| 93 page_proto->set_url(url.spec()); |
59 LoadURL(url); | 94 LoadURL(url); |
60 } | 95 } |
61 | 96 |
62 void DistillerImpl::LoadURL(const GURL& url) { | 97 void DistillerImpl::LoadURL(const GURL& url) { |
63 distiller_page_->LoadURL(url); | 98 distiller_page_->LoadURL(url); |
64 } | 99 } |
65 | 100 |
66 void DistillerImpl::OnLoadURLDone() { | 101 void DistillerImpl::OnLoadURLDone() { |
67 GetDistilledContent(); | 102 GetDistilledContent(); |
68 } | 103 } |
69 | 104 |
70 void DistillerImpl::GetDistilledContent() { | 105 void DistillerImpl::GetDistilledContent() { |
71 std::string script = | 106 std::string script = |
72 ResourceBundle::GetSharedInstance().GetRawDataResource( | 107 ResourceBundle::GetSharedInstance().GetRawDataResource( |
73 IDR_DISTILLER_JS).as_string(); | 108 IDR_DISTILLER_JS).as_string(); |
74 distiller_page_->ExecuteJavaScript(script); | 109 distiller_page_->ExecuteJavaScript(script); |
75 } | 110 } |
76 | 111 |
77 void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { | 112 void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { |
| 113 DCHECK(distillation_in_progress_); |
| 114 |
78 std::string result; | 115 std::string result; |
79 bool fetched_image = false; | |
80 const base::ListValue* result_list = NULL; | 116 const base::ListValue* result_list = NULL; |
81 if (!value->GetAsList(&result_list)) { | 117 if (!value->GetAsList(&result_list)) { |
82 DCHECK(proto_); | 118 distillation_in_progress_ = false; |
83 distillation_cb_.Run(proto_.Pass()); | 119 DistillationTaskComplete(); |
84 return; | 120 return; |
85 } | 121 } |
| 122 |
| 123 DistilledPageProto* current_page = GetLastPage(); |
| 124 GURL next_page_url; |
86 int i = 0; | 125 int i = 0; |
87 for (base::ListValue::const_iterator iter = result_list->begin(); | 126 for (base::ListValue::const_iterator iter = result_list->begin(); |
88 iter != result_list->end(); ++iter, ++i) { | 127 iter != result_list->end(); ++iter, ++i) { |
89 std::string item; | 128 std::string item; |
90 (*iter)->GetAsString(&item); | 129 (*iter)->GetAsString(&item); |
91 // The JavaScript returns an array where the first element is the title, | 130 // The JavaScript returns an array where the first element is the title, |
92 // the second element is the article content HTML, and the remaining | 131 // the second element is the article content HTML, and the remaining |
93 // elements are image URLs referenced in the HTML. | 132 // elements are image URLs referenced in the HTML. |
94 switch (i) { | 133 switch (i) { |
95 case 0: | 134 case 0: |
96 proto_->set_title(item); | 135 // Set the title of the article as the title of the first page. |
| 136 if (article_proto_->pages_size() == 1) |
| 137 article_proto_->set_title(item); |
97 break; | 138 break; |
98 case 1: | 139 case 1: |
99 proto_->set_html(item); | 140 current_page->set_html(item); |
100 break; | 141 break; |
| 142 case 2: { |
| 143 next_page_url = GURL(item); |
| 144 if (next_page_url.is_valid()) { |
| 145 GURL current_page_url(current_page->url()); |
| 146 // The pages should be in same origin. |
| 147 DCHECK_EQ(next_page_url.GetOrigin(), current_page_url.GetOrigin()); |
| 148 } |
| 149 break; |
| 150 } |
101 default: | 151 default: |
102 int image_number = i - 2; | 152 int page_number = article_proto_->pages_size(); |
103 std::string image_id = base::StringPrintf("%d", image_number); | 153 int image_number = i - 3; |
104 FetchImage(image_id, item); | 154 std::string image_id = base::IntToString(page_number) + "_" + |
105 fetched_image = true; | 155 base::IntToString(image_number); |
| 156 FetchImage(current_page, image_id, item); |
106 } | 157 } |
107 } | 158 } |
108 if (!fetched_image) | 159 processed_urls_.insert(current_page->url()); |
109 distillation_cb_.Run(proto_.Pass()); | 160 distillation_in_progress_ = false; |
| 161 DistillNextPage(next_page_url); |
110 } | 162 } |
111 | 163 |
112 void DistillerImpl::FetchImage(const std::string& image_id, | 164 DistilledPageProto* DistillerImpl::GetLastPage() const { |
| 165 DCHECK_GT(article_proto_->pages_size(), 0); |
| 166 int index = article_proto_->pages_size() - 1; |
| 167 DCHECK_GE(index, 0); |
| 168 return article_proto_->mutable_pages(index); |
| 169 } |
| 170 |
| 171 void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
| 172 const std::string& image_id, |
113 const std::string& item) { | 173 const std::string& item) { |
114 DistillerURLFetcher* fetcher = | 174 DistillerURLFetcher* fetcher = |
115 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); | 175 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
116 image_fetchers_[image_id] = fetcher; | 176 image_fetchers_[image_id] = fetcher; |
117 fetcher->FetchURL(item, | 177 fetcher->FetchURL(item, |
118 base::Bind(&DistillerImpl::OnFetchImageDone, | 178 base::Bind(&DistillerImpl::OnFetchImageDone, |
119 base::Unretained(this), image_id)); | 179 base::Unretained(this), |
| 180 base::Unretained(distilled_page_proto), |
| 181 image_id)); |
120 } | 182 } |
121 | 183 |
122 void DistillerImpl::OnFetchImageDone(const std::string& id, | 184 void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
| 185 const std::string& id, |
123 const std::string& response) { | 186 const std::string& response) { |
124 DCHECK(proto_); | 187 DCHECK_GT(article_proto_->pages_size(), 0); |
125 DistilledPageProto_Image* image = proto_->add_image(); | 188 DCHECK(distilled_page_proto); |
| 189 DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
126 image->set_name(id); | 190 image->set_name(id); |
127 image->set_data(response); | 191 image->set_data(response); |
128 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); | 192 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); |
129 DistillerURLFetcher* fetcher = image_fetchers_[id]; | 193 DistillerURLFetcher* fetcher = image_fetchers_[id]; |
130 int result = image_fetchers_.erase(id); | 194 int result = image_fetchers_.erase(id); |
131 delete fetcher; | 195 delete fetcher; |
132 DCHECK_EQ(1, result); | 196 DCHECK_EQ(1, result); |
133 if (image_fetchers_.empty()) { | 197 DistillationTaskComplete(); |
134 distillation_cb_.Run(proto_.Pass()); | 198 } |
| 199 |
| 200 void DistillerImpl::DistillationTaskComplete() { |
| 201 if (image_fetchers_.empty() && !distillation_in_progress_) { |
| 202 distillation_cb_.Run(article_proto_.Pass()); |
135 } | 203 } |
136 } | 204 } |
137 | 205 |
138 } // namespace dom_distiller | 206 } // namespace dom_distiller |
OLD | NEW |