OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library dart_style.src.line_splitting.line_splitter; |
| 6 |
| 7 import '../chunk.dart'; |
| 8 import '../debug.dart' as debug; |
| 9 import '../line_writer.dart'; |
| 10 import '../rule/rule.dart'; |
| 11 import 'rule_set.dart'; |
| 12 import 'solve_state.dart'; |
| 13 import 'solve_state_queue.dart'; |
| 14 |
| 15 /// To ensure the solver doesn't go totally pathological on giant code, we cap |
| 16 /// it at a fixed number of attempts. |
| 17 /// |
| 18 /// If the optimal solution isn't found after this many tries, it just uses the |
| 19 /// best it found so far. |
| 20 const _maxAttempts = 5000; |
| 21 |
| 22 /// Takes a set of chunks and determines the best values for its rules in order |
| 23 /// to fit it inside the page boundary. |
| 24 /// |
| 25 /// This problem is exponential in the number of rules and a single expression |
| 26 /// in Dart can be quite large, so it isn't feasible to brute force this. For |
| 27 /// example: |
| 28 /// |
| 29 /// outer( |
| 30 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
| 31 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
| 32 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
| 33 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8)); |
| 34 /// |
| 35 /// There are 509,607,936 ways this can be split. |
| 36 /// |
| 37 /// The problem is even harder because we may not be able to easily tell if a |
| 38 /// given solution is the best one. It's possible that there is *no* solution |
| 39 /// that fits in the page (due to long strings or identifiers) so the winning |
| 40 /// solution may still have overflow characters. This makes it hard to know |
| 41 /// when we are done and can stop looking. |
| 42 /// |
| 43 /// There are a couple of pieces of domain knowledge we use to cope with this: |
| 44 /// |
| 45 /// - Changing a rule from unsplit to split will never lower its cost. A |
| 46 /// solution with all rules unsplit will always be the one with the lowest |
| 47 /// cost (zero). Conversely, setting all of its rules to the maximum split |
| 48 /// value will always have the highest cost. |
| 49 /// |
| 50 /// (You might think there is a converse rule about overflow characters. The |
| 51 /// solution with the fewest splits will have the most overflow, and the |
| 52 /// solution with the most splits will have the least overflow. Alas, because |
| 53 /// of indentation, that isn't always the case. Adding a split may *increase* |
| 54 /// overflow in some cases.) |
| 55 /// |
| 56 /// - If all of the chunks for a rule are inside lines that already fit in the |
| 57 /// page, then splitting that rule will never improve the solution. |
| 58 /// |
| 59 /// - If two partial solutions have the same cost and the bound rules don't |
| 60 /// affect any of the remaining unbound rules, then whichever partial |
| 61 /// solution is currently better will always be the winner regardless of what |
| 62 /// the remaining unbound rules are bound to. |
| 63 /// |
| 64 /// We start off with a [SolveState] where all rules are unbound (which |
| 65 /// implicitly treats them as unsplit). For a given solve state, we can produce |
| 66 /// a set of expanded states that takes some of the rules in the first long |
| 67 /// line and bind them to split values. This always produces new solve states |
| 68 /// with higher cost (but often fewer overflow characters) than the parent |
| 69 /// state. |
| 70 /// |
| 71 /// We take these expanded states and add them to a work list sorted by cost. |
| 72 /// Since unsplit rules always have lower cost solutions, we know that no state |
| 73 /// we enqueue later will ever have a lower cost than the ones we already have |
| 74 /// enqueued. |
| 75 /// |
| 76 /// Then we keep pulling states off the work list and expanding them and adding |
| 77 /// the results back into the list. We do this until we hit a solution where |
| 78 /// all characters fit in the page. The first one we find will have the lowest |
| 79 /// cost and we're done. |
| 80 /// |
| 81 /// We also keep running track of the best solution we've found so far that |
| 82 /// has the fewest overflow characters and the lowest cost. If no solution fits, |
| 83 /// we'll use this one. |
| 84 /// |
| 85 /// When enqueing a solution, we can sometimes collapse it and a previously |
| 86 /// queued one by preferring one or the other. If two solutions have the same |
| 87 /// cost and we can prove that they won't diverge later as unbound rules are |
| 88 /// set, we can pick the winner now and discard the other. This lets us avoid |
| 89 /// redundantly exploring entire subtrees of the solution space. |
| 90 /// |
| 91 /// As a final escape hatch for pathologically nasty code, after trying some |
| 92 /// fixed maximum number of solve states, we just bail and return the best |
| 93 /// solution found so far. |
| 94 /// |
| 95 /// Even with the above algorithmic optimizations, complex code may still |
| 96 /// require a lot of exploring to find an optimal solution. To make that fast, |
| 97 /// this code is carefully profiled and optimized. If you modify this, make |
| 98 /// sure to test against the benchmark to ensure you don't regress performance. |
| 99 class LineSplitter { |
| 100 final LineWriter writer; |
| 101 |
| 102 /// The list of chunks being split. |
| 103 final List<Chunk> chunks; |
| 104 |
| 105 /// The set of soft rules whose values are being selected. |
| 106 final List<Rule> rules; |
| 107 |
| 108 /// The number of characters of additional indentation to apply to each line. |
| 109 /// |
| 110 /// This is used when formatting blocks to get the output into the right |
| 111 /// column based on where the block appears. |
| 112 final int blockIndentation; |
| 113 |
| 114 /// The starting column of the first line. |
| 115 final int firstLineIndent; |
| 116 |
| 117 /// The queue of solve states to explore further. |
| 118 /// |
| 119 /// This is sorted lowest-cost first. This ensures that as soon as we find a |
| 120 /// solution that fits in the page, we know it will be the lowest cost one |
| 121 /// and can stop looking. |
| 122 final _queue = new SolveStateQueue(); |
| 123 |
| 124 /// The lowest cost solution found so far. |
| 125 SolveState _bestSolution; |
| 126 |
| 127 /// Creates a new splitter for [_writer] that tries to fit [chunks] into the |
| 128 /// page width. |
| 129 LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation, |
| 130 int firstLineIndent, |
| 131 {bool flushLeft: false}) |
| 132 : chunks = chunks, |
| 133 // Collect the set of soft rules that we need to select values for. |
| 134 rules = chunks |
| 135 .map((chunk) => chunk.rule) |
| 136 .where((rule) => rule != null && rule is! HardSplitRule) |
| 137 .toSet() |
| 138 .toList(growable: false), |
| 139 blockIndentation = blockIndentation, |
| 140 firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation { |
| 141 _queue.bindSplitter(this); |
| 142 |
| 143 // Store the rule's index in the rule so we can get from a chunk to a rule |
| 144 // index quickly. |
| 145 for (var i = 0; i < rules.length; i++) { |
| 146 rules[i].index = i; |
| 147 } |
| 148 } |
| 149 |
| 150 /// Determine the best way to split the chunks into lines that fit in the |
| 151 /// page, if possible. |
| 152 /// |
| 153 /// Returns a [SplitSet] that defines where each split occurs and the |
| 154 /// indentation of each line. |
| 155 /// |
| 156 /// [firstLineIndent] is the number of characters of whitespace to prefix the |
| 157 /// first line of output with. |
| 158 SplitSet apply() { |
| 159 // Start with a completely unbound, unsplit solution. |
| 160 _queue.add(new SolveState(this, new RuleSet(rules.length))); |
| 161 |
| 162 var attempts = 0; |
| 163 while (_queue.isNotEmpty) { |
| 164 var state = _queue.removeFirst(); |
| 165 |
| 166 if (state.isBetterThan(_bestSolution)) { |
| 167 _bestSolution = state; |
| 168 |
| 169 // Since we sort solutions by cost the first solution we find that |
| 170 // fits is the winner. |
| 171 if (_bestSolution.overflowChars == 0) break; |
| 172 } |
| 173 |
| 174 if (debug.traceSplitter) { |
| 175 var best = state == _bestSolution ? " (best)" : ""; |
| 176 debug.log("$state$best"); |
| 177 debug.dumpLines(chunks, firstLineIndent, state.splits); |
| 178 debug.log(); |
| 179 } |
| 180 |
| 181 if (attempts++ > _maxAttempts) break; |
| 182 |
| 183 // Try bumping the rule values for rules whose chunks are on long lines. |
| 184 state.expand(); |
| 185 } |
| 186 |
| 187 if (debug.traceSplitter) { |
| 188 debug.log("$_bestSolution (winner)"); |
| 189 debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits); |
| 190 debug.log(); |
| 191 } |
| 192 |
| 193 return _bestSolution.splits; |
| 194 } |
| 195 |
| 196 void enqueue(SolveState state) { |
| 197 _queue.add(state); |
| 198 } |
| 199 } |
OLD | NEW |