chrome/browser/autocomplete/shortcuts_provider.cc - Issue 10831004: Several changes to speed up the ShortcutsProvider:

Side by Side Diff: chrome/browser/autocomplete/shortcuts_provider.cc

Issue 10831004: Several changes to speed up the ShortcutsProvider: (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/autocomplete/shortcuts_provider.h"	5 #include "chrome/browser/autocomplete/shortcuts_provider.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <cmath>	8 #include <cmath>

9 #include <map>	9 #include <map>

10 #include <vector>	10 #include <vector>

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
63 }	63 }

64	64

65 void ShortcutsProvider::Start(const AutocompleteInput& input,	65 void ShortcutsProvider::Start(const AutocompleteInput& input,

66 bool minimal_changes) {	66 bool minimal_changes) {

67 matches_.clear();	67 matches_.clear();

68	68

69 if ((input.type() == AutocompleteInput::INVALID) \|\|	69 if ((input.type() == AutocompleteInput::INVALID) \|\|

70 (input.type() == AutocompleteInput::FORCED_QUERY))	70 (input.type() == AutocompleteInput::FORCED_QUERY))

71 return;	71 return;

72	72

	73 // None of our results are applicable for best match.

	74 if (input.matches_requested() == AutocompleteInput::BEST_MATCH)

	75 return;

	76

73 if (input.text().empty())	77 if (input.text().empty())

74 return;	78 return;

75	79

76 if (!initialized_)	80 if (!initialized_)

77 return;	81 return;

78	82

79 base::TimeTicks start_time = base::TimeTicks::Now();	83 base::TimeTicks start_time = base::TimeTicks::Now();

80 GetMatches(input);	84 GetMatches(input);

81 if (input.text().length() < 6) {	85 if (input.text().length() < 6) {

82 base::TimeTicks end_time = base::TimeTicks::Now();	86 base::TimeTicks end_time = base::TimeTicks::Now();

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
143 if (!backend)	147 if (!backend)

144 return;	148 return;

145 // Get the URLs from the shortcuts database with keys that partially or	149 // Get the URLs from the shortcuts database with keys that partially or

146 // completely match the search term.	150 // completely match the search term.

147 string16 term_string(base::i18n::ToLower(input.text()));	151 string16 term_string(base::i18n::ToLower(input.text()));

148 DCHECK(!term_string.empty());	152 DCHECK(!term_string.empty());

149	153

150 for (history::ShortcutsBackend::ShortcutMap::const_iterator it =	154 for (history::ShortcutsBackend::ShortcutMap::const_iterator it =

151 FindFirstMatch(term_string, backend.get());	155 FindFirstMatch(term_string, backend.get());

152 it != backend->shortcuts_map().end() &&	156 it != backend->shortcuts_map().end() &&

153 StartsWith(it->first, term_string, true); ++it)	157 StartsWith(it->first, term_string, true); ++it) {

154 matches_.push_back(ShortcutToACMatch(input, term_string, it->second));	158 // Don't return shortcuts with zero relevance.

	159 int relevance = CalculateScore(term_string, it->second);

	160 if (relevance)

	161 matches_.push_back(ShortcutToACMatch(relevance, term_string, it->second));

	162 }

155 std::partial_sort(matches_.begin(),	163 std::partial_sort(matches_.begin(),

156 matches_.begin() +	164 matches_.begin() +

157 std::min(AutocompleteProvider::kMaxMatches, matches_.size()),	165 std::min(AutocompleteProvider::kMaxMatches, matches_.size()),

158 matches_.end(), &AutocompleteMatch::MoreRelevant);	166 matches_.end(), &AutocompleteMatch::MoreRelevant);

159 if (matches_.size() > AutocompleteProvider::kMaxMatches) {	167 if (matches_.size() > AutocompleteProvider::kMaxMatches) {

160 matches_.erase(matches_.begin() + AutocompleteProvider::kMaxMatches,	168 matches_.erase(matches_.begin() + AutocompleteProvider::kMaxMatches,

161 matches_.end());	169 matches_.end());

162 }	170 }

163 }	171 }

164	172

165 AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(	173 AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(

166 const AutocompleteInput& input,	174 int relevance,

167 const string16& term_string,	175 const string16& term_string,

168 const history::ShortcutsBackend::Shortcut& shortcut) {	176 const history::ShortcutsBackend::Shortcut& shortcut) {

169 AutocompleteMatch match(this, CalculateScore(term_string, shortcut),	177 DCHECK(!term_string.empty());

170 true, AutocompleteMatch::HISTORY_TITLE);	178 AutocompleteMatch match(this, relevance, true,

	179 AutocompleteMatch::HISTORY_TITLE);

171 match.destination_url = shortcut.url;	180 match.destination_url = shortcut.url;

172 DCHECK(match.destination_url.is_valid());	181 DCHECK(match.destination_url.is_valid());

173 match.fill_into_edit = UTF8ToUTF16(shortcut.url.spec());	182 match.fill_into_edit = UTF8ToUTF16(shortcut.url.spec());

	183 match.contents = shortcut.contents;

	184 match.contents_class = shortcut.contents_class;

	185 match.description = shortcut.description;

	186 match.description_class = shortcut.description_class;

174	187

175 match.contents = shortcut.contents;	188 // Try to mark pieces of the contents and description as matches if they

176 match.contents_class = ClassifyAllMatchesInString(term_string,	189 // appear in \|term_string\|.

177 match.contents,	190 WordMap terms_map(CreateWordMapForString(term_string));
	mrossetti 2012/07/25 22:18:36 Perhaps immediately return if terms_map is empty. Perhaps immediately return if terms_map is empty. Peter Kasting 2012/07/25 22:39:56 Good catch, done. I actually must do this lest I Show quoted text On 2012/07/25 22:18:36, mrossetti wrote: > Perhaps immediately return if terms_map is empty. Good catch, done. I actually must do this lest I violate a DCHECK in ClassifyAllMatchesInString().
178 shortcut.contents_class);	191 match.contents_class = ClassifyAllMatchesInString(term_string, terms_map,

	192 match.contents, match.contents_class);

	193 match.description_class = ClassifyAllMatchesInString(term_string, terms_map,

	194 match.description, match.description_class);

	195 return match;

	196 }

179	197

180 match.description = shortcut.description;	198 // static

181 match.description_class = ClassifyAllMatchesInString(	199 ShortcutsProvider::WordMap ShortcutsProvider::CreateWordMapForString(

182 term_string, match.description, shortcut.description_class);	200 const string16& text) {

	201 // First, convert \|text\| to a vector of the unique words in it.

	202 WordMap word_map;

	203 base::i18n::BreakIterator word_iter(text,

	204 base::i18n::BreakIterator::BREAK_WORD);

	205 if (!word_iter.Init())

	206 return word_map;

	207 std::vector<string16> words;

	208 while (word_iter.Advance()) {

	209 if (word_iter.IsWord())

	210 words.push_back(word_iter.GetString());

	211 }

	212 if (words.empty())

	213 return word_map;

	214 std::sort(words.begin(), words.end());

	215 words.erase(std::unique(words.begin(), words.end()), words.end());

183	216

184 return match;	217 // Now create a map from (first character) to (words beginning with that

	218 // character). We insert in reverse lexicographical order and rely on the

	219 // multimap preserving insertion order for values with the same key. (This

	220 // is mandated in C++11, and part of that decision was based on a survey of

	221 // existing implementations that found that it was already true everywhere.)

	222 std::reverse(words.begin(), words.end());

	223 for (std::vector<string16>::const_iterator i(words.begin()); i != words.end();

	224 ++i)

	225 word_map.insert(std::make_pair((i)[0], i));

	226 return word_map;

185 }	227 }

186	228

187 // static	229 // static

188 ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(	230 ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(

189 const string16& find_text,	231 const string16& find_text,

	232 const WordMap& find_words,

190 const string16& text,	233 const string16& text,

191 const ACMatchClassifications& original_class) {	234 const ACMatchClassifications& original_class) {

192 DCHECK(!find_text.empty());	235 DCHECK(!find_text.empty());

	236 DCHECK(!find_words.empty());

193	237

194 base::i18n::BreakIterator term_iter(find_text,	238 // First check whether \|text\| begins with \|find_text\| and mark that whole

195 base::i18n::BreakIterator::BREAK_WORD);	239 // section as a match if so.

196 if (!term_iter.Init())

197 return original_class;

198

199 std::vector<string16> terms;

200 while (term_iter.Advance()) {

201 if (term_iter.IsWord())

202 terms.push_back(term_iter.GetString());

203 }

204 // Sort the strings so that longer strings appear after their prefixes, if

205 // any.

206 std::sort(terms.begin(), terms.end());

207

208 // Find the earliest match of any word in \|find_text\| in the \|text\|. Add to

209 // \|match_class\|. Move pointer after match. Repeat until all matches are

210 // found.

211 string16 text_lowercase(base::i18n::ToLower(text));	240 string16 text_lowercase(base::i18n::ToLower(text));

212 ACMatchClassifications match_class;	241 ACMatchClassifications match_class;

213 // \|match_class\| should start at the position 0, if the matched text start	242 size_t last_position = 0;

214 // from the position 0, this will be popped from the vector further down.	243 if (StartsWith(text_lowercase, find_text, true)) {

215 match_class.push_back(ACMatchClassification(0, ACMatchClassification::NONE));	244 match_class.push_back(

216 for (size_t last_position = 0; last_position < text_lowercase.length();) {	245 ACMatchClassification(0, ACMatchClassification::MATCH));

217 size_t match_start = text_lowercase.length();	246 last_position = find_text.length();

218 size_t match_end = last_position;	247 if (last_position < text_lowercase.length()) {
	mrossetti 2012/07/25 22:18:36 You might comment on why this check is required. You might comment on why this check is required. Peter Kasting 2012/07/25 22:39:56 OK. Show quoted text On 2012/07/25 22:18:36, mrossetti wrote: > You might comment on why this check is required. OK.
	248 match_class.push_back(

	249 ACMatchClassification(last_position, ACMatchClassification::NONE));

	250 }

	251 } else {

	252 // \|match_class\| should start at position 0. If the first matching word is

	253 // found at position 0, this will be popped from the vector further down.

	254 match_class.push_back(

	255 ACMatchClassification(0, ACMatchClassification::NONE));

	256 }

219	257

220 for (size_t i = 0; i < terms.size(); ++i) {	258 // Now, starting with \|last_position\|, check each character in

221 size_t match = text_lowercase.find(terms[i], last_position);	259 // \|text_lowercase\| to see if we have words starting with that character in

222 // Use <= in conjunction with the sort() call above so that longer strings	260 // \|find_words\|. If so, check each of them to see if they match the portion

223 // are matched in preference to their prefixes.	261 // of \|text_lowercase\| beginning with \|last_position\|. Accept the first

224 if (match != string16::npos && match <= match_start) {	262 // matching word found (which should be the longest possible match at this

225 match_start = match;	263 // location, given the construction of \|find_words\|) and add a MATCH region to

226 match_end = match + terms[i].length();	264 // \|match_class\|, moving \|last_position\| to be after the matching word. If we

	265 // found no matching words, move to the next character and repeat.

	266 while (last_position < text_lowercase.length()) {

	267 std::pair<WordMap::const_iterator, WordMap::const_iterator> range(

	268 find_words.equal_range(text_lowercase[last_position]));
	mrossetti 2012/07/25 22:18:36 Sweet! Sweet!
	269 size_t next_character = last_position + 1;

	270 for (WordMap::const_iterator i(range.first); i != range.second; ++i) {

	271 const string16& word = i->second;

	272 size_t word_end = last_position + word.length();

	273 if ((word_end <= text_lowercase.length()) &&

	274 !text_lowercase.compare(last_position, word.length(), word)) {

	275 // Collapse adjacent ranges into one.

	276 if (match_class.back().offset == last_position)

	277 match_class.pop_back();

	278

	279 AutocompleteMatch::AddLastClassificationIfNecessary(&match_class,

	280 last_position, ACMatchClassification::MATCH);

	281 if (word_end < text_lowercase.length()) {

	282 match_class.push_back(

	283 ACMatchClassification(word_end, ACMatchClassification::NONE));

	284 }

	285 last_position = word_end;

	286 break;

227 }	287 }

228 }	288 }

229	289 last_position = std::max(last_position, next_character);

230 if (match_start >= match_end)

231 break;

232

233 // Collapse adjacent ranges into one.

234 if (!match_class.empty() && match_class.back().offset == match_start)

235 match_class.pop_back();

236

237 AutocompleteMatch::AddLastClassificationIfNecessary(&match_class,

238 match_start, ACMatchClassification::MATCH);

239 if (match_end < text_lowercase.length()) {

240 AutocompleteMatch::AddLastClassificationIfNecessary(&match_class,

241 match_end, ACMatchClassification::NONE);

242 }

243

244 last_position = match_end;

245 }	290 }

246	291

247 // Merge match-marking data with original classifications.	292 // Merge match-marking data with original classifications.

248 if (match_class.empty())	293 if ((match_class.size() == 1) &&

	294 (match_class.back().style == ACMatchClassification::NONE))

249 return original_class;	295 return original_class;

250

251 ACMatchClassifications output;	296 ACMatchClassifications output;

252 for (ACMatchClassifications::const_iterator i = original_class.begin(),	297 for (ACMatchClassifications::const_iterator i = original_class.begin(),

253 j = match_class.begin(); i != original_class.end();) {	298 j = match_class.begin(); i != original_class.end();) {

254 AutocompleteMatch::AddLastClassificationIfNecessary(&output,	299 AutocompleteMatch::AddLastClassificationIfNecessary(&output,

255 std::max(i->offset, j->offset), i->style \| j->style);	300 std::max(i->offset, j->offset), i->style \| j->style);

256 const size_t next_i_offset = (i + 1) == original_class.end() ?	301 const size_t next_i_offset = (i + 1) == original_class.end() ?

257 static_cast<size_t>(-1) : (i + 1)->offset;	302 static_cast<size_t>(-1) : (i + 1)->offset;

258 const size_t next_j_offset = (j + 1) == match_class.end() ?	303 const size_t next_j_offset = (j + 1) == match_class.end() ?

259 static_cast<size_t>(-1) : (j + 1)->offset;	304 static_cast<size_t>(-1) : (j + 1)->offset;

260 if (next_i_offset >= next_j_offset)	305 if (next_i_offset >= next_j_offset)

261 ++j;	306 ++j;

262 if (next_j_offset >= next_i_offset)	307 if (next_j_offset >= next_i_offset)

263 ++i;	308 ++i;

264 }	309 }

265

266 return output;	310 return output;

267 }	311 }

268	312

269 history::ShortcutsBackend::ShortcutMap::const_iterator	313 history::ShortcutsBackend::ShortcutMap::const_iterator

270 ShortcutsProvider::FindFirstMatch(const string16& keyword,	314 ShortcutsProvider::FindFirstMatch(const string16& keyword,

271 history::ShortcutsBackend* backend) {	315 history::ShortcutsBackend* backend) {

272 DCHECK(backend);	316 DCHECK(backend);

273 history::ShortcutsBackend::ShortcutMap::const_iterator it =	317 history::ShortcutsBackend::ShortcutMap::const_iterator it =

274 backend->shortcuts_map().lower_bound(keyword);	318 backend->shortcuts_map().lower_bound(keyword);

275 // Lower bound not necessarily matches the keyword, check for item pointed by	319 // Lower bound not necessarily matches the keyword, check for item pointed by

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
308 // (1.0 / each 5 additional hits), up to a maximum of 5x as long.	352 // (1.0 / each 5 additional hits), up to a maximum of 5x as long.

309 const double kMaxDecaySpeedDivisor = 5.0;	353 const double kMaxDecaySpeedDivisor = 5.0;

310 const double kNumUsesPerDecaySpeedDivisorIncrement = 5.0;	354 const double kNumUsesPerDecaySpeedDivisorIncrement = 5.0;

311 double decay_divisor = std::min(kMaxDecaySpeedDivisor,	355 double decay_divisor = std::min(kMaxDecaySpeedDivisor,

312 (shortcut.number_of_hits + kNumUsesPerDecaySpeedDivisorIncrement - 1) /	356 (shortcut.number_of_hits + kNumUsesPerDecaySpeedDivisorIncrement - 1) /

313 kNumUsesPerDecaySpeedDivisorIncrement);	357 kNumUsesPerDecaySpeedDivisorIncrement);

314	358

315 return static_cast<int>((base_score / exp(decay_exponent / decay_divisor)) +	359 return static_cast<int>((base_score / exp(decay_exponent / decay_divisor)) +

316 0.5);	360 0.5);

317 }	361 }

OLD	NEW

« no previous file with comments | « chrome/browser/autocomplete/shortcuts_provider.h ('k') | chrome/browser/autocomplete/shortcuts_provider_unittest.cc » ('j') | no next file with comments »