Index: lib/src/line_splitting/line_splitter.dart |
diff --git a/lib/src/line_splitting/line_splitter.dart b/lib/src/line_splitting/line_splitter.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f562ca40dc699dc45f96e404f1b4ae7c2b66d207 |
--- /dev/null |
+++ b/lib/src/line_splitting/line_splitter.dart |
@@ -0,0 +1,199 @@ |
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+library dart_style.src.line_splitting.line_splitter; |
+ |
+import '../chunk.dart'; |
+import '../debug.dart' as debug; |
+import '../line_writer.dart'; |
+import '../rule/rule.dart'; |
+import 'rule_set.dart'; |
+import 'solve_state.dart'; |
+import 'solve_state_queue.dart'; |
+ |
+/// To ensure the solver doesn't go totally pathological on giant code, we cap |
+/// it at a fixed number of attempts. |
+/// |
+/// If the optimal solution isn't found after this many tries, it just uses the |
+/// best it found so far. |
+const _maxAttempts = 5000; |
+ |
+/// Takes a set of chunks and determines the best values for its rules in order |
+/// to fit it inside the page boundary. |
+/// |
+/// This problem is exponential in the number of rules and a single expression |
+/// in Dart can be quite large, so it isn't feasible to brute force this. For |
+/// example: |
+/// |
+/// outer( |
+/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
+/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
+/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
+/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8)); |
+/// |
+/// There are 509,607,936 ways this can be split. |
+/// |
+/// The problem is even harder because we may not be able to easily tell if a |
+/// given solution is the best one. It's possible that there is *no* solution |
+/// that fits in the page (due to long strings or identifiers) so the winning |
+/// solution may still have overflow characters. This makes it hard to know |
+/// when we are done and can stop looking. |
+/// |
+/// There are a couple of pieces of domain knowledge we use to cope with this: |
+/// |
+/// - Changing a rule from unsplit to split will never lower its cost. A |
+/// solution with all rules unsplit will always be the one with the lowest |
+/// cost (zero). Conversely, setting all of its rules to the maximum split |
+/// value will always have the highest cost. |
+/// |
+/// (You might think there is a converse rule about overflow characters. The |
+/// solution with the fewest splits will have the most overflow, and the |
+/// solution with the most splits will have the least overflow. Alas, because |
+/// of indentation, that isn't always the case. Adding a split may *increase* |
+/// overflow in some cases.) |
+/// |
+/// - If all of the chunks for a rule are inside lines that already fit in the |
+/// page, then splitting that rule will never improve the solution. |
+/// |
+/// - If two partial solutions have the same cost and the bound rules don't |
+/// affect any of the remaining unbound rules, then whichever partial |
+/// solution is currently better will always be the winner regardless of what |
+/// the remaining unbound rules are bound to. |
+/// |
+/// We start off with a [SolveState] where all rules are unbound (which |
+/// implicitly treats them as unsplit). For a given solve state, we can produce |
+/// a set of expanded states that takes some of the rules in the first long |
+/// line and bind them to split values. This always produces new solve states |
+/// with higher cost (but often fewer overflow characters) than the parent |
+/// state. |
+/// |
+/// We take these expanded states and add them to a work list sorted by cost. |
+/// Since unsplit rules always have lower cost solutions, we know that no state |
+/// we enqueue later will ever have a lower cost than the ones we already have |
+/// enqueued. |
+/// |
+/// Then we keep pulling states off the work list and expanding them and adding |
+/// the results back into the list. We do this until we hit a solution where |
+/// all characters fit in the page. The first one we find will have the lowest |
+/// cost and we're done. |
+/// |
+/// We also keep running track of the best solution we've found so far that |
+/// has the fewest overflow characters and the lowest cost. If no solution fits, |
+/// we'll use this one. |
+/// |
+/// When enqueing a solution, we can sometimes collapse it and a previously |
+/// queued one by preferring one or the other. If two solutions have the same |
+/// cost and we can prove that they won't diverge later as unbound rules are |
+/// set, we can pick the winner now and discard the other. This lets us avoid |
+/// redundantly exploring entire subtrees of the solution space. |
+/// |
+/// As a final escape hatch for pathologically nasty code, after trying some |
+/// fixed maximum number of solve states, we just bail and return the best |
+/// solution found so far. |
+/// |
+/// Even with the above algorithmic optimizations, complex code may still |
+/// require a lot of exploring to find an optimal solution. To make that fast, |
+/// this code is carefully profiled and optimized. If you modify this, make |
+/// sure to test against the benchmark to ensure you don't regress performance. |
+class LineSplitter { |
+ final LineWriter writer; |
+ |
+ /// The list of chunks being split. |
+ final List<Chunk> chunks; |
+ |
+ /// The set of soft rules whose values are being selected. |
+ final List<Rule> rules; |
+ |
+ /// The number of characters of additional indentation to apply to each line. |
+ /// |
+ /// This is used when formatting blocks to get the output into the right |
+ /// column based on where the block appears. |
+ final int blockIndentation; |
+ |
+ /// The starting column of the first line. |
+ final int firstLineIndent; |
+ |
+ /// The queue of solve states to explore further. |
+ /// |
+ /// This is sorted lowest-cost first. This ensures that as soon as we find a |
+ /// solution that fits in the page, we know it will be the lowest cost one |
+ /// and can stop looking. |
+ final _queue = new SolveStateQueue(); |
+ |
+ /// The lowest cost solution found so far. |
+ SolveState _bestSolution; |
+ |
+ /// Creates a new splitter for [_writer] that tries to fit [chunks] into the |
+ /// page width. |
+ LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation, |
+ int firstLineIndent, |
+ {bool flushLeft: false}) |
+ : chunks = chunks, |
+ // Collect the set of soft rules that we need to select values for. |
+ rules = chunks |
+ .map((chunk) => chunk.rule) |
+ .where((rule) => rule != null && rule is! HardSplitRule) |
+ .toSet() |
+ .toList(growable: false), |
+ blockIndentation = blockIndentation, |
+ firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation { |
+ _queue.bindSplitter(this); |
+ |
+ // Store the rule's index in the rule so we can get from a chunk to a rule |
+ // index quickly. |
+ for (var i = 0; i < rules.length; i++) { |
+ rules[i].index = i; |
+ } |
+ } |
+ |
+ /// Determine the best way to split the chunks into lines that fit in the |
+ /// page, if possible. |
+ /// |
+ /// Returns a [SplitSet] that defines where each split occurs and the |
+ /// indentation of each line. |
+ /// |
+ /// [firstLineIndent] is the number of characters of whitespace to prefix the |
+ /// first line of output with. |
+ SplitSet apply() { |
+ // Start with a completely unbound, unsplit solution. |
+ _queue.add(new SolveState(this, new RuleSet(rules.length))); |
+ |
+ var attempts = 0; |
+ while (_queue.isNotEmpty) { |
+ var state = _queue.removeFirst(); |
+ |
+ if (state.isBetterThan(_bestSolution)) { |
+ _bestSolution = state; |
+ |
+ // Since we sort solutions by cost the first solution we find that |
+ // fits is the winner. |
+ if (_bestSolution.overflowChars == 0) break; |
+ } |
+ |
+ if (debug.traceSplitter) { |
+ var best = state == _bestSolution ? " (best)" : ""; |
+ debug.log("$state$best"); |
+ debug.dumpLines(chunks, firstLineIndent, state.splits); |
+ debug.log(); |
+ } |
+ |
+ if (attempts++ > _maxAttempts) break; |
+ |
+ // Try bumping the rule values for rules whose chunks are on long lines. |
+ state.expand(); |
+ } |
+ |
+ if (debug.traceSplitter) { |
+ debug.log("$_bestSolution (winner)"); |
+ debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits); |
+ debug.log(); |
+ } |
+ |
+ return _bestSolution.splits; |
+ } |
+ |
+ void enqueue(SolveState state) { |
+ _queue.add(state); |
+ } |
+} |