OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/distiller.h" | 5 #include "components/dom_distiller/core/distiller.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
11 #include "base/strings/stringprintf.h" | 11 #include "base/location.h" |
| 12 #include "base/message_loop/message_loop.h" |
| 13 #include "base/strings/string_number_conversions.h" |
12 #include "base/strings/utf_string_conversions.h" | 14 #include "base/strings/utf_string_conversions.h" |
13 #include "base/values.h" | 15 #include "base/values.h" |
14 #include "components/dom_distiller/core/distiller_page.h" | 16 #include "components/dom_distiller/core/distiller_page.h" |
15 #include "components/dom_distiller/core/distiller_url_fetcher.h" | 17 #include "components/dom_distiller/core/distiller_url_fetcher.h" |
| 18 #include "components/dom_distiller/core/proto/distilled_article.pb.h" |
16 #include "components/dom_distiller/core/proto/distilled_page.pb.h" | 19 #include "components/dom_distiller/core/proto/distilled_page.pb.h" |
17 #include "grit/dom_distiller_resources.h" | |
18 #include "net/url_request/url_request_context_getter.h" | 20 #include "net/url_request/url_request_context_getter.h" |
19 #include "ui/base/resource/resource_bundle.h" | 21 |
20 #include "url/gurl.h" | 22 namespace { |
| 23 // Maximum number of distilled pages in an article. |
| 24 const int kMaxPagesInArticle = 32; |
| 25 } |
21 | 26 |
22 namespace dom_distiller { | 27 namespace dom_distiller { |
23 | 28 |
24 DistillerFactoryImpl::DistillerFactoryImpl( | 29 DistillerFactoryImpl::DistillerFactoryImpl( |
25 scoped_ptr<DistillerPageFactory> distiller_page_factory, | 30 scoped_ptr<DistillerPageFactory> distiller_page_factory, |
26 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) | 31 scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory) |
27 : distiller_page_factory_(distiller_page_factory.Pass()), | 32 : distiller_page_factory_(distiller_page_factory.Pass()), |
28 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} | 33 distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()) {} |
29 | 34 |
30 DistillerFactoryImpl::~DistillerFactoryImpl() {} | 35 DistillerFactoryImpl::~DistillerFactoryImpl() {} |
31 | 36 |
32 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { | 37 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
33 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( | 38 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( |
34 *distiller_page_factory_, *distiller_url_fetcher_factory_)); | 39 *distiller_page_factory_, *distiller_url_fetcher_factory_)); |
35 distiller->Init(); | 40 distiller->Init(); |
36 return distiller.PassAs<Distiller>(); | 41 return distiller.PassAs<Distiller>(); |
37 } | 42 } |
38 | 43 |
39 DistillerImpl::DistillerImpl( | 44 DistillerImpl::DistillerImpl( |
40 const DistillerPageFactory& distiller_page_factory, | 45 const DistillerPageFactory& distiller_page_factory, |
41 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) | 46 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
42 : distiller_page_factory_(distiller_page_factory), | 47 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), |
43 distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { | 48 distillation_in_progress_(false) { |
44 distiller_page_ = distiller_page_factory_.CreateDistillerPage(this).Pass(); | 49 page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
45 } | 50 } |
46 | 51 |
47 DistillerImpl::~DistillerImpl() { | 52 DistillerImpl::~DistillerImpl() { |
48 } | 53 } |
49 | 54 |
50 void DistillerImpl::Init() { | 55 void DistillerImpl::Init() { |
51 distiller_page_->Init(); | 56 DCHECK(!distillation_in_progress_); |
| 57 page_distiller_->Init(); |
| 58 article_proto_.reset(new DistilledArticleProto()); |
52 } | 59 } |
53 | 60 |
54 void DistillerImpl::DistillPage(const GURL& url, | 61 void DistillerImpl::DistillPage(const GURL& url, |
55 const DistillerCallback& distillation_cb) { | 62 const DistillerCallback& distillation_cb) { |
| 63 DCHECK(!distillation_in_progress_); |
56 distillation_cb_ = distillation_cb; | 64 distillation_cb_ = distillation_cb; |
57 proto_.reset(new DistilledPageProto()); | 65 DistillPage(url); |
58 proto_->set_url(url.spec()); | |
59 LoadURL(url); | |
60 } | 66 } |
61 | 67 |
62 void DistillerImpl::LoadURL(const GURL& url) { | 68 void DistillerImpl::DistillPage(const GURL& url) { |
63 distiller_page_->LoadURL(url); | 69 DCHECK(!distillation_in_progress_); |
| 70 if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && |
| 71 std::find(processed_urls_.begin(), processed_urls_.end(), url.spec()) == |
| 72 processed_urls_.end()) { |
| 73 distillation_in_progress_ = true; |
| 74 // Distill the next page. |
| 75 DCHECK(url.is_valid()); |
| 76 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
| 77 page_distiller_->DistillPage( |
| 78 url, |
| 79 base::Bind(&DistillerImpl::OnPageDistillationFinished, |
| 80 base::Unretained(this), |
| 81 url)); |
| 82 } else { |
| 83 RunDistillerCallbackIfDone(); |
| 84 } |
64 } | 85 } |
65 | 86 |
66 void DistillerImpl::OnLoadURLDone() { | 87 void DistillerImpl::OnPageDistillationFinished( |
67 GetDistilledContent(); | 88 const GURL& page_url, |
| 89 const DistilledPageInfo& distilled_page, |
| 90 bool distillation_successful) { |
| 91 DCHECK(distillation_in_progress_); |
| 92 if (!distillation_successful) { |
| 93 RunDistillerCallbackIfDone(); |
| 94 } else { |
| 95 DistilledPageProto* current_page = article_proto_->add_pages(); |
| 96 // Set the title of the article as the title of the first page. |
| 97 if (article_proto_->pages_size() == 1) { |
| 98 article_proto_->set_title(distilled_page.title); |
| 99 } |
| 100 |
| 101 current_page->set_url(page_url.spec()); |
| 102 current_page->set_html(distilled_page.html); |
| 103 |
| 104 GURL next_page_url(distilled_page.next_page_url); |
| 105 if (next_page_url.is_valid()) { |
| 106 // The pages should be in same origin. |
| 107 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
| 108 } |
| 109 |
| 110 processed_urls_.push_back(page_url.spec()); |
| 111 distillation_in_progress_ = false; |
| 112 int page_number = article_proto_->pages_size(); |
| 113 for (size_t img_num = 0; img_num < distilled_page.image_urls.size(); |
| 114 ++img_num) { |
| 115 std::string image_id = |
| 116 base::IntToString(page_number) + "_" + base::IntToString(img_num); |
| 117 FetchImage(current_page, image_id, distilled_page.image_urls[img_num]); |
| 118 } |
| 119 DistillPage(next_page_url); |
| 120 } |
68 } | 121 } |
69 | 122 |
70 void DistillerImpl::GetDistilledContent() { | 123 void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, |
71 std::string script = | 124 const std::string& image_id, |
72 ResourceBundle::GetSharedInstance().GetRawDataResource( | |
73 IDR_DISTILLER_JS).as_string(); | |
74 distiller_page_->ExecuteJavaScript(script); | |
75 } | |
76 | |
77 void DistillerImpl::OnExecuteJavaScriptDone(const base::Value* value) { | |
78 std::string result; | |
79 bool fetched_image = false; | |
80 const base::ListValue* result_list = NULL; | |
81 if (!value->GetAsList(&result_list)) { | |
82 DCHECK(proto_); | |
83 distillation_cb_.Run(proto_.Pass()); | |
84 return; | |
85 } | |
86 int i = 0; | |
87 for (base::ListValue::const_iterator iter = result_list->begin(); | |
88 iter != result_list->end(); ++iter, ++i) { | |
89 std::string item; | |
90 (*iter)->GetAsString(&item); | |
91 // The JavaScript returns an array where the first element is the title, | |
92 // the second element is the article content HTML, and the remaining | |
93 // elements are image URLs referenced in the HTML. | |
94 switch (i) { | |
95 case 0: | |
96 proto_->set_title(item); | |
97 break; | |
98 case 1: | |
99 proto_->set_html(item); | |
100 break; | |
101 default: | |
102 int image_number = i - 2; | |
103 std::string image_id = base::StringPrintf("%d", image_number); | |
104 FetchImage(image_id, item); | |
105 fetched_image = true; | |
106 } | |
107 } | |
108 if (!fetched_image) | |
109 distillation_cb_.Run(proto_.Pass()); | |
110 } | |
111 | |
112 void DistillerImpl::FetchImage(const std::string& image_id, | |
113 const std::string& item) { | 125 const std::string& item) { |
114 DistillerURLFetcher* fetcher = | 126 DistillerURLFetcher* fetcher = |
115 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); | 127 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
116 image_fetchers_[image_id] = fetcher; | 128 image_fetchers_[image_id] = fetcher; |
117 fetcher->FetchURL(item, | 129 fetcher->FetchURL(item, |
118 base::Bind(&DistillerImpl::OnFetchImageDone, | 130 base::Bind(&DistillerImpl::OnFetchImageDone, |
119 base::Unretained(this), image_id)); | 131 base::Unretained(this), |
| 132 base::Unretained(distilled_page_proto), |
| 133 image_id)); |
120 } | 134 } |
121 | 135 |
122 void DistillerImpl::OnFetchImageDone(const std::string& id, | 136 void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, |
| 137 const std::string& id, |
123 const std::string& response) { | 138 const std::string& response) { |
124 DCHECK(proto_); | 139 DCHECK_GT(article_proto_->pages_size(), 0); |
125 DistilledPageProto_Image* image = proto_->add_image(); | 140 DCHECK(distilled_page_proto); |
| 141 DistilledPageProto_Image* image = distilled_page_proto->add_image(); |
126 image->set_name(id); | 142 image->set_name(id); |
127 image->set_data(response); | 143 image->set_data(response); |
128 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); | 144 DCHECK(image_fetchers_.end() != image_fetchers_.find(id)); |
129 DistillerURLFetcher* fetcher = image_fetchers_[id]; | 145 DistillerURLFetcher* fetcher = image_fetchers_[id]; |
130 int result = image_fetchers_.erase(id); | 146 int result = image_fetchers_.erase(id); |
131 delete fetcher; | 147 delete fetcher; |
132 DCHECK_EQ(1, result); | 148 DCHECK_EQ(1, result); |
133 if (image_fetchers_.empty()) { | 149 RunDistillerCallbackIfDone(); |
134 distillation_cb_.Run(proto_.Pass()); | 150 } |
| 151 |
| 152 void DistillerImpl::RunDistillerCallbackIfDone() { |
| 153 if (image_fetchers_.empty() && !distillation_in_progress_) { |
| 154 distillation_cb_.Run(article_proto_.Pass()); |
135 } | 155 } |
136 } | 156 } |
137 | 157 |
138 } // namespace dom_distiller | 158 } // namespace dom_distiller |
OLD | NEW |