OLD | NEW |
| (Empty) |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 library dart_style.src.line_splitter; | |
6 | |
7 import 'package:collection/priority_queue.dart'; | |
8 | |
9 import 'chunk.dart'; | |
10 import 'debug.dart' as debug; | |
11 import 'line_writer.dart'; | |
12 import 'rule/rule.dart'; | |
13 import 'rule_set.dart'; | |
14 | |
15 /// To ensure the solver doesn't go totally pathological on giant code, we cap | |
16 /// it at a fixed number of attempts. | |
17 /// | |
18 /// If the optimal solution isn't found after this many tries, it just uses the | |
19 /// best it found so far. | |
20 const _maxAttempts = 50000; | |
21 | |
22 /// Takes a set of chunks and determines the best values for its rules in order | |
23 /// to fit it inside the page boundary. | |
24 /// | |
25 /// This problem is exponential in the number of rules and a single expression | |
26 /// in Dart can be quite large, so it isn't feasible to brute force this. For | |
27 /// example: | |
28 /// | |
29 /// outer( | |
30 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), | |
31 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), | |
32 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), | |
33 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8)); | |
34 /// | |
35 /// There are 509,607,936 ways this can be split. | |
36 /// | |
37 /// The problem is even harder because we may not be able to easily tell if a | |
38 /// given solution is the best one. It's possible that there is *no* solution | |
39 /// that fits in the page (due to long strings or identifiers) so the winning | |
40 /// solution may still have overflow characters. This makes it hard to know | |
41 /// when we are done and can stop looking. | |
42 /// | |
43 /// There are a couple of pieces of domain knowledge we use to cope with this: | |
44 /// | |
45 /// - Changing a rule from unsplit to split will never lower its cost. A | |
46 /// solution with all rules unsplit will always be the one with the lowest | |
47 /// cost (zero). Conversely, setting all of its rules to the maximum split | |
48 /// value will always have the highest cost. | |
49 /// | |
50 /// (You might think there is a converse rule about overflow characters. The | |
51 /// solution with the fewest splits will have the most overflow, and the | |
52 /// solution with the most splits will have the least overflow. Alas, because | |
53 /// of indentation, that isn't always the case. Adding a split may *increase* | |
54 /// overflow in some cases.) | |
55 /// | |
56 /// - If all of the chunks for a rule are inside lines that already fit in the | |
57 /// page, then splitting that rule will never improve the solution. | |
58 /// | |
59 /// We start off with a [SolveState] where all rules are unbound (which | |
60 /// implicitly treats them as unsplit). For a given solve state, we can produce | |
61 /// a set of expanded states that takes some of the rules in the first long | |
62 /// line and bind them to split values. This always produces new solve states | |
63 /// with higher cost (but often fewer overflow characters) than the parent | |
64 /// state. | |
65 /// | |
66 /// We take these expanded states and add them to a work list sorted by cost. | |
67 /// Since unsplit rules always have lower cost solutions, we know that no state | |
68 /// we enqueue later will ever have a lower cost than the ones we already have | |
69 /// enqueued. | |
70 /// | |
71 /// Then we keep pulling states off the work list and expanding them and adding | |
72 /// the results back into the list. We do this until we hit a solution where | |
73 /// all characters fit in the page. The first one we find will have the lowest | |
74 /// cost and we're done. | |
75 /// | |
76 /// We also keep running track of the best solution we've found so far that | |
77 /// has the fewest overflow characters and the lowest cost. If no solution fits, | |
78 /// we'll use this one. | |
79 /// | |
80 /// As a final escape hatch for pathologically nasty code, after trying some | |
81 /// fixed maximum number of solve states, we just bail and return the best | |
82 /// solution found so far. | |
83 /// | |
84 /// Even with the above algorithmic optimizations, complex code may still | |
85 /// require a lot of exploring to find an optimal solution. To make that fast, | |
86 /// this code is carefully profiled and optimized. If you modify this, make | |
87 /// sure to test against the benchmark to ensure you don't regress performance. | |
88 class LineSplitter { | |
89 final LineWriter _writer; | |
90 | |
91 /// The list of chunks being split. | |
92 final List<Chunk> _chunks; | |
93 | |
94 /// The set of soft rules whose values are being selected. | |
95 final List<Rule> _rules; | |
96 | |
97 /// The number of characters of additional indentation to apply to each line. | |
98 /// | |
99 /// This is used when formatting blocks to get the output into the right | |
100 /// column based on where the block appears. | |
101 final int _blockIndentation; | |
102 | |
103 /// The starting column of the first line. | |
104 final int _firstLineIndent; | |
105 | |
106 /// The list of solve states to explore further. | |
107 /// | |
108 /// This is sorted lowest-cost first. This ensures that as soon as we find a | |
109 /// solution that fits in the page, we know it will be the lowest cost one | |
110 /// and can stop looking. | |
111 final _workList = new HeapPriorityQueue<SolveState>(); | |
112 | |
113 /// The lowest cost solution found so far. | |
114 SolveState _bestSolution; | |
115 | |
116 /// Creates a new splitter for [_writer] that tries to fit [chunks] into the | |
117 /// page width. | |
118 LineSplitter(this._writer, List<Chunk> chunks, int blockIndentation, | |
119 int firstLineIndent, | |
120 {bool flushLeft: false}) | |
121 : _chunks = chunks, | |
122 // Collect the set of soft rules that we need to select values for. | |
123 _rules = chunks | |
124 .map((chunk) => chunk.rule) | |
125 .where((rule) => rule != null && rule is! HardSplitRule) | |
126 .toSet() | |
127 .toList(growable: false), | |
128 _blockIndentation = blockIndentation, | |
129 _firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation { | |
130 // Store the rule's index in the rule so we can get from a chunk to a rule | |
131 // index quickly. | |
132 for (var i = 0; i < _rules.length; i++) { | |
133 _rules[i].index = i; | |
134 } | |
135 } | |
136 | |
137 /// Determine the best way to split the chunks into lines that fit in the | |
138 /// page, if possible. | |
139 /// | |
140 /// Returns a [SplitSet] that defines where each split occurs and the | |
141 /// indentation of each line. | |
142 /// | |
143 /// [firstLineIndent] is the number of characters of whitespace to prefix the | |
144 /// first line of output with. | |
145 SplitSet apply() { | |
146 // Start with a completely unbound, unsplit solution. | |
147 _workList.add(new SolveState(this, new RuleSet(_rules.length))); | |
148 | |
149 var attempts = 0; | |
150 while (!_workList.isEmpty) { | |
151 var state = _workList.removeFirst(); | |
152 | |
153 if (state.isBetterThan(_bestSolution)) { | |
154 _bestSolution = state; | |
155 | |
156 // Since we sort solutions by cost the first solution we find that | |
157 // fits is the winner. | |
158 if (_bestSolution.overflowChars == 0) break; | |
159 } | |
160 | |
161 if (debug.traceSplitter) { | |
162 var best = state == _bestSolution ? " (best)" : ""; | |
163 debug.log("$state$best"); | |
164 debug.dumpLines(_chunks, _firstLineIndent, state.splits); | |
165 debug.log(); | |
166 } | |
167 | |
168 if (attempts++ > _maxAttempts) break; | |
169 | |
170 // Try bumping the rule values for rules whose chunks are on long lines. | |
171 state.expand(); | |
172 } | |
173 | |
174 if (debug.traceSplitter) { | |
175 debug.log("$_bestSolution (winner)"); | |
176 debug.dumpLines(_chunks, _firstLineIndent, _bestSolution.splits); | |
177 debug.log(); | |
178 } | |
179 | |
180 return _bestSolution.splits; | |
181 } | |
182 } | |
183 | |
184 /// A possibly incomplete solution in the line splitting search space. | |
185 /// | |
186 /// A single [SolveState] binds some subset of the rules to values while | |
187 /// leaving the rest unbound. If every rule is bound, the solve state describes | |
188 /// a complete solution to the line splitting problem. Even if rules are | |
189 /// unbound, a state can also usually be used as a solution by treating all | |
190 /// unbound rules as unsplit. (The usually is because a state that constrains | |
191 /// an unbound rule to split can't be used with that rule unsplit.) | |
192 /// | |
193 /// From a given solve state, we can explore the search tree to more refined | |
194 /// solve states by producing new ones that add more bound rules to the current | |
195 /// state. | |
196 class SolveState implements Comparable<SolveState> { | |
197 final LineSplitter _splitter; | |
198 final RuleSet _ruleValues; | |
199 | |
200 /// The unbound rules in this state that can be bound to produce new more | |
201 /// refined states. | |
202 /// | |
203 /// Keeping this set small is the key to make the entire line splitter | |
204 /// perform well. If we consider too make rules at each state, our | |
205 /// exploration of the solution space is too branchy and we waste time on | |
206 /// dead end solutions. | |
207 /// | |
208 /// Here is the key insight. The line splitter treats any unbound rule as | |
209 /// being unsplit. This means refining a solution always means taking a rule | |
210 /// that is unsplit and making it split. That monotonically increases the | |
211 /// cost, but may help fit the solution inside the page. | |
212 /// | |
213 /// We want to keep the cost low, so the only reason to consider making a | |
214 /// rule split is if it reduces an overflowing line. It's also the case that | |
215 /// splitting an earlier rule will often reshuffle the rest of the line. | |
216 /// | |
217 /// Taking that into account, the only rules we consider binding to extend a | |
218 /// solve state are *unbound rules inside the first line that is overflowing*. | |
219 /// Even if a line has dozens of rules, this generally keeps the branching | |
220 /// down to a few. It also means rules inside lines that already fit are | |
221 /// never touched. | |
222 /// | |
223 /// There is one other set of rules that go in here. Sometimes a bound rule | |
224 /// in the solve state constrains some other unbound rule to split. In that | |
225 /// case, we also consider that active so we know to not leave it at zero. | |
226 final _liveRules = new Set<Rule>(); | |
227 | |
228 /// The set of splits chosen for this state. | |
229 SplitSet get splits => _splits; | |
230 SplitSet _splits; | |
231 | |
232 /// The number of characters that do not fit inside the page with this set of | |
233 /// splits. | |
234 int get overflowChars => _overflowChars; | |
235 int _overflowChars; | |
236 | |
237 /// Whether we can treat this state as a complete solution by leaving its | |
238 /// unbound rules unsplit. | |
239 /// | |
240 /// This is generally true but will be false if the state contains any | |
241 /// unbound rules that are constrained to not be zero by other bound rules. | |
242 /// This avoids picking a solution that leaves those rules at zero when they | |
243 /// aren't allowed to be. | |
244 bool _isComplete = true; | |
245 | |
246 SolveState(this._splitter, this._ruleValues) { | |
247 _calculateSplits(); | |
248 _calculateCost(); | |
249 } | |
250 | |
251 /// Orders this state relative to [other]. | |
252 /// | |
253 /// This is the best-first ordering that the [LineSplitter] uses in its | |
254 /// worklist. It prefers cheaper states even if they overflow because this | |
255 /// ensures it finds the best solution first as soon as it finds one that | |
256 /// fits in the page so it can early out. | |
257 int compareTo(SolveState other) { | |
258 // TODO(rnystrom): It may be worth sorting by the estimated lowest number | |
259 // of overflow characters first. That doesn't help in cases where there is | |
260 // a solution that fits, but may help in corner cases where there is no | |
261 // fitting solution. | |
262 | |
263 if (splits.cost != other.splits.cost) { | |
264 return splits.cost.compareTo(other.splits.cost); | |
265 } | |
266 | |
267 if (overflowChars != other.overflowChars) { | |
268 return overflowChars.compareTo(other.overflowChars); | |
269 } | |
270 | |
271 // Distinguish states based on the rule values just so that states with the | |
272 // same cost range but different rule values don't get considered identical | |
273 // by HeapPriorityQueue. | |
274 for (var rule in _splitter._rules) { | |
275 var value = _ruleValues.getValue(rule); | |
276 var otherValue = other._ruleValues.getValue(rule); | |
277 | |
278 if (value != otherValue) return value.compareTo(otherValue); | |
279 } | |
280 | |
281 // If we get here, this state is identical to [other]. | |
282 return 0; | |
283 } | |
284 | |
285 /// Returns `true` if this state is a better solution to use as the final | |
286 /// result than [other]. | |
287 bool isBetterThan(SolveState other) { | |
288 // If this state contains an unbound rule that we know can't be left | |
289 // unsplit, we can't pick this as a solution. | |
290 if (!_isComplete) return false; | |
291 | |
292 // Anything is better than nothing. | |
293 if (other == null) return true; | |
294 | |
295 // Prefer the solution that fits the most in the page. | |
296 if (overflowChars != other.overflowChars) { | |
297 return overflowChars < other.overflowChars; | |
298 } | |
299 | |
300 // Otherwise, prefer the best cost. | |
301 return splits.cost < other.splits.cost; | |
302 } | |
303 | |
304 /// Enqueues more solve states to consider based on this one. | |
305 /// | |
306 /// For each unbound rule in this state that occurred in the first long line, | |
307 /// enqueue solve states that bind that rule to each value it can have and | |
308 /// bind all previous rules to zero. (In other words, try all subsolutions | |
309 /// where that rule becomes the first new rule to split at.) | |
310 void expand() { | |
311 var unsplitRules = _ruleValues.clone(); | |
312 | |
313 // Walk down the rules looking for unbound ones to try. | |
314 var triedRules = 0; | |
315 for (var rule in _splitter._rules) { | |
316 if (_liveRules.contains(rule)) { | |
317 // We found one worth trying, so try all of its values. | |
318 for (var value = 1; value < rule.numValues; value++) { | |
319 var boundRules = unsplitRules.clone(); | |
320 | |
321 var mustSplitRules; | |
322 var valid = boundRules.tryBind(_splitter._rules, rule, value, (rule) { | |
323 if (mustSplitRules == null) mustSplitRules = []; | |
324 mustSplitRules.add(rule); | |
325 }); | |
326 | |
327 // Make sure we don't violate the constraints of the bound rules. | |
328 if (!valid) continue; | |
329 | |
330 var state = new SolveState(_splitter, boundRules); | |
331 | |
332 // If some unbound rules are constrained to split, remember that. | |
333 if (mustSplitRules != null) { | |
334 state._isComplete = false; | |
335 state._liveRules.addAll(mustSplitRules); | |
336 } | |
337 | |
338 _splitter._workList.add(state); | |
339 } | |
340 | |
341 // Stop once we've tried all of the ones we can. | |
342 if (++triedRules == _liveRules.length) break; | |
343 } | |
344 | |
345 // Fill in previous unbound rules with zero. | |
346 if (!_ruleValues.contains(rule)) { | |
347 // Pass a dummy callback because zero will never fail. (If it would | |
348 // have, that rule would already be bound to some other value.) | |
349 if (!unsplitRules.tryBind(_splitter._rules, rule, 0, (_) {})) { | |
350 break; | |
351 } | |
352 } | |
353 } | |
354 } | |
355 | |
356 /// Calculates the [SplitSet] for this solve state, assuming any unbound | |
357 /// rules are set to zero. | |
358 void _calculateSplits() { | |
359 // Figure out which expression nesting levels got split and need to be | |
360 // assigned columns. | |
361 var usedNestingLevels = new Set(); | |
362 for (var i = 0; i < _splitter._chunks.length - 1; i++) { | |
363 var chunk = _splitter._chunks[i]; | |
364 if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) { | |
365 usedNestingLevels.add(chunk.nesting); | |
366 chunk.nesting.clearTotalUsedIndent(); | |
367 } | |
368 } | |
369 | |
370 for (var nesting in usedNestingLevels) { | |
371 nesting.refreshTotalUsedIndent(usedNestingLevels); | |
372 } | |
373 | |
374 _splits = new SplitSet(_splitter._chunks.length); | |
375 for (var i = 0; i < _splitter._chunks.length - 1; i++) { | |
376 var chunk = _splitter._chunks[i]; | |
377 if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) { | |
378 var indent = 0; | |
379 if (!chunk.flushLeftAfter) { | |
380 // Add in the chunk's indent. | |
381 indent = _splitter._blockIndentation + chunk.indent; | |
382 | |
383 // And any expression nesting. | |
384 indent += chunk.nesting.totalUsedIndent; | |
385 } | |
386 | |
387 _splits.add(i, indent); | |
388 } | |
389 } | |
390 } | |
391 | |
392 /// Gets the value to use for [rule], either the bound value or `0` if it | |
393 /// isn't bound. | |
394 int _getValue(Rule rule) { | |
395 if (rule is HardSplitRule) return 0; | |
396 | |
397 return _ruleValues.getValue(rule); | |
398 } | |
399 | |
400 /// Evaluates the cost (i.e. the relative "badness") of splitting the line | |
401 /// into [lines] physical lines based on the current set of rules. | |
402 void _calculateCost() { | |
403 assert(_splits != null); | |
404 | |
405 // Calculate the length of each line and apply the cost of any spans that | |
406 // get split. | |
407 var cost = 0; | |
408 _overflowChars = 0; | |
409 | |
410 var length = _splitter._firstLineIndent; | |
411 | |
412 // The unbound rules in use by the current line. This will be null after | |
413 // the first long line has completed. | |
414 var currentLineRules = []; | |
415 | |
416 endLine(int end) { | |
417 // Track lines that went over the length. It is only rules contained in | |
418 // long lines that we may want to split. | |
419 if (length > _splitter._writer.pageWidth) { | |
420 _overflowChars += length - _splitter._writer.pageWidth; | |
421 | |
422 // Only try rules that are in the first long line, since we know at | |
423 // least one of them *will* be split. | |
424 if (currentLineRules != null && currentLineRules.isNotEmpty) { | |
425 _liveRules.addAll(currentLineRules); | |
426 currentLineRules = null; | |
427 } | |
428 } else { | |
429 // The line fit, so don't keep track of its rules. | |
430 if (currentLineRules != null) { | |
431 currentLineRules.clear(); | |
432 } | |
433 } | |
434 } | |
435 | |
436 // The set of spans that contain chunks that ended up splitting. We store | |
437 // these in a set so a span's cost doesn't get double-counted if more than | |
438 // one split occurs in it. | |
439 var splitSpans = new Set(); | |
440 | |
441 for (var i = 0; i < _splitter._chunks.length; i++) { | |
442 var chunk = _splitter._chunks[i]; | |
443 | |
444 length += chunk.text.length; | |
445 | |
446 // Ignore the split after the last chunk. | |
447 if (i == _splitter._chunks.length - 1) break; | |
448 | |
449 if (_splits.shouldSplitAt(i)) { | |
450 endLine(i); | |
451 | |
452 splitSpans.addAll(chunk.spans); | |
453 | |
454 // Include the cost of the nested block. | |
455 if (chunk.blockChunks.isNotEmpty) { | |
456 cost += | |
457 _splitter._writer.formatBlock(chunk, _splits.getColumn(i)).cost; | |
458 } | |
459 | |
460 // Start the new line. | |
461 length = _splits.getColumn(i); | |
462 } else { | |
463 if (chunk.spaceWhenUnsplit) length++; | |
464 | |
465 // Include the nested block inline, if any. | |
466 length += chunk.unsplitBlockLength; | |
467 | |
468 // If we might be in the first overly long line, keep track of any | |
469 // unbound rules we encounter. These are ones that we'll want to try to | |
470 // bind to shorten the long line. | |
471 if (currentLineRules != null && | |
472 chunk.rule != null && | |
473 !chunk.isHardSplit && | |
474 !_ruleValues.contains(chunk.rule)) { | |
475 currentLineRules.add(chunk.rule); | |
476 } | |
477 } | |
478 } | |
479 | |
480 // Add the costs for the rules that split. | |
481 _ruleValues.forEach(_splitter._rules, (rule, value) { | |
482 // A rule may be bound to zero if another rule constrains it to not split. | |
483 if (value != 0) cost += rule.cost; | |
484 }); | |
485 | |
486 // Add the costs for the spans containing splits. | |
487 for (var span in splitSpans) cost += span.cost; | |
488 | |
489 // Finish the last line. | |
490 endLine(_splitter._chunks.length); | |
491 | |
492 _splits.setCost(cost); | |
493 } | |
494 | |
495 String toString() { | |
496 var buffer = new StringBuffer(); | |
497 | |
498 buffer.writeAll( | |
499 _splitter._rules.map((rule) { | |
500 var valueLength = "${rule.fullySplitValue}".length; | |
501 | |
502 var value = "?"; | |
503 if (_ruleValues.contains(rule)) { | |
504 value = "${_ruleValues.getValue(rule)}"; | |
505 } | |
506 | |
507 value = value.padLeft(valueLength); | |
508 if (_liveRules.contains(rule)) { | |
509 value = debug.bold(value); | |
510 } else { | |
511 value = debug.gray(value); | |
512 } | |
513 | |
514 return value; | |
515 }), | |
516 " "); | |
517 | |
518 buffer.write(" \$${splits.cost}"); | |
519 | |
520 if (overflowChars > 0) buffer.write(" (${overflowChars} over)"); | |
521 if (!_isComplete) buffer.write(" (incomplete)"); | |
522 if (splits == null) buffer.write(" invalid"); | |
523 | |
524 return buffer.toString(); | |
525 } | |
526 } | |
OLD | NEW |