Index: lib/src/line_splitting/solve_state.dart |
diff --git a/lib/src/line_splitter.dart b/lib/src/line_splitting/solve_state.dart |
similarity index 52% |
rename from lib/src/line_splitter.dart |
rename to lib/src/line_splitting/solve_state.dart |
index f89d4f430e3cd197901775332035517a77b24c0b..04a9b1142caa0dd8aee8bef4d5f6032c7b20459f 100644 |
--- a/lib/src/line_splitter.dart |
+++ b/lib/src/line_splitting/solve_state.dart |
@@ -2,185 +2,13 @@ |
// for details. All rights reserved. Use of this source code is governed by a |
// BSD-style license that can be found in the LICENSE file. |
-library dart_style.src.line_splitter; |
+library dart_style.src.line_splitting.solve_state; |
-import 'package:collection/priority_queue.dart'; |
- |
-import 'chunk.dart'; |
-import 'debug.dart' as debug; |
-import 'line_writer.dart'; |
-import 'rule/rule.dart'; |
+import '../debug.dart' as debug; |
+import '../rule/rule.dart'; |
+import 'line_splitter.dart'; |
import 'rule_set.dart'; |
-/// To ensure the solver doesn't go totally pathological on giant code, we cap |
-/// it at a fixed number of attempts. |
-/// |
-/// If the optimal solution isn't found after this many tries, it just uses the |
-/// best it found so far. |
-const _maxAttempts = 50000; |
- |
-/// Takes a set of chunks and determines the best values for its rules in order |
-/// to fit it inside the page boundary. |
-/// |
-/// This problem is exponential in the number of rules and a single expression |
-/// in Dart can be quite large, so it isn't feasible to brute force this. For |
-/// example: |
-/// |
-/// outer( |
-/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
-/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
-/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), |
-/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8)); |
-/// |
-/// There are 509,607,936 ways this can be split. |
-/// |
-/// The problem is even harder because we may not be able to easily tell if a |
-/// given solution is the best one. It's possible that there is *no* solution |
-/// that fits in the page (due to long strings or identifiers) so the winning |
-/// solution may still have overflow characters. This makes it hard to know |
-/// when we are done and can stop looking. |
-/// |
-/// There are a couple of pieces of domain knowledge we use to cope with this: |
-/// |
-/// - Changing a rule from unsplit to split will never lower its cost. A |
-/// solution with all rules unsplit will always be the one with the lowest |
-/// cost (zero). Conversely, setting all of its rules to the maximum split |
-/// value will always have the highest cost. |
-/// |
-/// (You might think there is a converse rule about overflow characters. The |
-/// solution with the fewest splits will have the most overflow, and the |
-/// solution with the most splits will have the least overflow. Alas, because |
-/// of indentation, that isn't always the case. Adding a split may *increase* |
-/// overflow in some cases.) |
-/// |
-/// - If all of the chunks for a rule are inside lines that already fit in the |
-/// page, then splitting that rule will never improve the solution. |
-/// |
-/// We start off with a [SolveState] where all rules are unbound (which |
-/// implicitly treats them as unsplit). For a given solve state, we can produce |
-/// a set of expanded states that takes some of the rules in the first long |
-/// line and bind them to split values. This always produces new solve states |
-/// with higher cost (but often fewer overflow characters) than the parent |
-/// state. |
-/// |
-/// We take these expanded states and add them to a work list sorted by cost. |
-/// Since unsplit rules always have lower cost solutions, we know that no state |
-/// we enqueue later will ever have a lower cost than the ones we already have |
-/// enqueued. |
-/// |
-/// Then we keep pulling states off the work list and expanding them and adding |
-/// the results back into the list. We do this until we hit a solution where |
-/// all characters fit in the page. The first one we find will have the lowest |
-/// cost and we're done. |
-/// |
-/// We also keep running track of the best solution we've found so far that |
-/// has the fewest overflow characters and the lowest cost. If no solution fits, |
-/// we'll use this one. |
-/// |
-/// As a final escape hatch for pathologically nasty code, after trying some |
-/// fixed maximum number of solve states, we just bail and return the best |
-/// solution found so far. |
-/// |
-/// Even with the above algorithmic optimizations, complex code may still |
-/// require a lot of exploring to find an optimal solution. To make that fast, |
-/// this code is carefully profiled and optimized. If you modify this, make |
-/// sure to test against the benchmark to ensure you don't regress performance. |
-class LineSplitter { |
- final LineWriter _writer; |
- |
- /// The list of chunks being split. |
- final List<Chunk> _chunks; |
- |
- /// The set of soft rules whose values are being selected. |
- final List<Rule> _rules; |
- |
- /// The number of characters of additional indentation to apply to each line. |
- /// |
- /// This is used when formatting blocks to get the output into the right |
- /// column based on where the block appears. |
- final int _blockIndentation; |
- |
- /// The starting column of the first line. |
- final int _firstLineIndent; |
- |
- /// The list of solve states to explore further. |
- /// |
- /// This is sorted lowest-cost first. This ensures that as soon as we find a |
- /// solution that fits in the page, we know it will be the lowest cost one |
- /// and can stop looking. |
- final _workList = new HeapPriorityQueue<SolveState>(); |
- |
- /// The lowest cost solution found so far. |
- SolveState _bestSolution; |
- |
- /// Creates a new splitter for [_writer] that tries to fit [chunks] into the |
- /// page width. |
- LineSplitter(this._writer, List<Chunk> chunks, int blockIndentation, |
- int firstLineIndent, |
- {bool flushLeft: false}) |
- : _chunks = chunks, |
- // Collect the set of soft rules that we need to select values for. |
- _rules = chunks |
- .map((chunk) => chunk.rule) |
- .where((rule) => rule != null && rule is! HardSplitRule) |
- .toSet() |
- .toList(growable: false), |
- _blockIndentation = blockIndentation, |
- _firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation { |
- // Store the rule's index in the rule so we can get from a chunk to a rule |
- // index quickly. |
- for (var i = 0; i < _rules.length; i++) { |
- _rules[i].index = i; |
- } |
- } |
- |
- /// Determine the best way to split the chunks into lines that fit in the |
- /// page, if possible. |
- /// |
- /// Returns a [SplitSet] that defines where each split occurs and the |
- /// indentation of each line. |
- /// |
- /// [firstLineIndent] is the number of characters of whitespace to prefix the |
- /// first line of output with. |
- SplitSet apply() { |
- // Start with a completely unbound, unsplit solution. |
- _workList.add(new SolveState(this, new RuleSet(_rules.length))); |
- |
- var attempts = 0; |
- while (!_workList.isEmpty) { |
- var state = _workList.removeFirst(); |
- |
- if (state.isBetterThan(_bestSolution)) { |
- _bestSolution = state; |
- |
- // Since we sort solutions by cost the first solution we find that |
- // fits is the winner. |
- if (_bestSolution.overflowChars == 0) break; |
- } |
- |
- if (debug.traceSplitter) { |
- var best = state == _bestSolution ? " (best)" : ""; |
- debug.log("$state$best"); |
- debug.dumpLines(_chunks, _firstLineIndent, state.splits); |
- debug.log(); |
- } |
- |
- if (attempts++ > _maxAttempts) break; |
- |
- // Try bumping the rule values for rules whose chunks are on long lines. |
- state.expand(); |
- } |
- |
- if (debug.traceSplitter) { |
- debug.log("$_bestSolution (winner)"); |
- debug.dumpLines(_chunks, _firstLineIndent, _bestSolution.splits); |
- debug.log(); |
- } |
- |
- return _bestSolution.splits; |
- } |
-} |
- |
/// A possibly incomplete solution in the line splitting search space. |
/// |
/// A single [SolveState] binds some subset of the rules to values while |
@@ -193,7 +21,7 @@ class LineSplitter { |
/// From a given solve state, we can explore the search tree to more refined |
/// solve states by producing new ones that add more bound rules to the current |
/// state. |
-class SolveState implements Comparable<SolveState> { |
+class SolveState { |
final LineSplitter _splitter; |
final RuleSet _ruleValues; |
@@ -243,43 +71,28 @@ class SolveState implements Comparable<SolveState> { |
/// aren't allowed to be. |
bool _isComplete = true; |
+ /// The constraints the bound rules in this state have on the remaining |
+ /// unbound rules. |
+ Map<Rule, int> _constraints; |
+ |
+ /// The bound rules that appear inside lines also containing unbound rules. |
+ /// |
+ /// By appearing in the same line, it means these bound rules may affect the |
+ /// results of binding those unbound rules. This is used to tell if two |
+ /// states may diverge by binding unbound rules or not. |
+ Set<Rule> _boundRulesInUnboundLines; |
+ |
SolveState(this._splitter, this._ruleValues) { |
_calculateSplits(); |
_calculateCost(); |
} |
- /// Orders this state relative to [other]. |
- /// |
- /// This is the best-first ordering that the [LineSplitter] uses in its |
- /// worklist. It prefers cheaper states even if they overflow because this |
- /// ensures it finds the best solution first as soon as it finds one that |
- /// fits in the page so it can early out. |
- int compareTo(SolveState other) { |
- // TODO(rnystrom): It may be worth sorting by the estimated lowest number |
- // of overflow characters first. That doesn't help in cases where there is |
- // a solution that fits, but may help in corner cases where there is no |
- // fitting solution. |
- |
- if (splits.cost != other.splits.cost) { |
- return splits.cost.compareTo(other.splits.cost); |
- } |
- |
- if (overflowChars != other.overflowChars) { |
- return overflowChars.compareTo(other.overflowChars); |
- } |
- |
- // Distinguish states based on the rule values just so that states with the |
- // same cost range but different rule values don't get considered identical |
- // by HeapPriorityQueue. |
- for (var rule in _splitter._rules) { |
- var value = _ruleValues.getValue(rule); |
- var otherValue = other._ruleValues.getValue(rule); |
- |
- if (value != otherValue) return value.compareTo(otherValue); |
- } |
+ /// Gets the value to use for [rule], either the bound value or `0` if it |
+ /// isn't bound. |
+ int getValue(Rule rule) { |
+ if (rule is HardSplitRule) return 0; |
- // If we get here, this state is identical to [other]. |
- return 0; |
+ return _ruleValues.getValue(rule); |
} |
/// Returns `true` if this state is a better solution to use as the final |
@@ -301,6 +114,50 @@ class SolveState implements Comparable<SolveState> { |
return splits.cost < other.splits.cost; |
} |
+ /// Determines if this state "overlaps" [other]. |
+ /// |
+ /// Two states overlap if they currently have the same score and we can tell |
+ /// for certain that they won't diverge as their unbound rules are bound. If |
+ /// that's the case, then whichever state is better now (based on their |
+ /// currently bound rule values) is the one that will always win, regardless |
+ /// of how they get expanded. |
+ /// |
+ /// In other words, their entire expanded solution trees also overlap. In |
+ /// that case, there's no point in expanding both, so we can just pick the |
+ /// winner now and discard the other. |
+ /// |
+ /// For this to be true, we need to prove that binding an unbound rule won't |
+ /// affect one state differently from the other. We have to show that they |
+ /// are parallel. |
+ /// |
+ /// Two things could cause this *not* to be the case. |
+ /// |
+ /// 1. If one state's bound rules place different constraints on the unbound |
+ /// rules than the other. |
+ /// |
+ /// 2. If one state's different bound rules are in the same line as an |
+ /// unbound rule. That affects the indentation and length of the line, |
+ /// which affects the context where the unbound rule is being chosen. |
+ /// |
+ /// If neither of these is the case, the states overlap. Returns `<0` if this |
+ /// state is better, or `>0` if [other] wins. If the states do not overlap, |
+ /// returns `0`. |
+ int compareOverlap(SolveState other) { |
+ if (!_isOverlapping(other)) return 0; |
+ |
+ // They do overlap, so see which one wins. |
+ for (var rule in _splitter.rules) { |
+ var value = _ruleValues.getValue(rule); |
+ var otherValue = other._ruleValues.getValue(rule); |
+ |
+ if (value != otherValue) return value.compareTo(otherValue); |
+ } |
+ |
+ // The way SolveStates are expanded should guarantee that we never generate |
+ // the exact same state twice. Getting here implies that that failed. |
+ throw "unreachable"; |
+ } |
+ |
/// Enqueues more solve states to consider based on this one. |
/// |
/// For each unbound rule in this state that occurred in the first long line, |
@@ -312,14 +169,14 @@ class SolveState implements Comparable<SolveState> { |
// Walk down the rules looking for unbound ones to try. |
var triedRules = 0; |
- for (var rule in _splitter._rules) { |
+ for (var rule in _splitter.rules) { |
if (_liveRules.contains(rule)) { |
// We found one worth trying, so try all of its values. |
for (var value = 1; value < rule.numValues; value++) { |
var boundRules = unsplitRules.clone(); |
var mustSplitRules; |
- var valid = boundRules.tryBind(_splitter._rules, rule, value, (rule) { |
+ var valid = boundRules.tryBind(_splitter.rules, rule, value, (rule) { |
if (mustSplitRules == null) mustSplitRules = []; |
mustSplitRules.add(rule); |
}); |
@@ -335,7 +192,7 @@ class SolveState implements Comparable<SolveState> { |
state._liveRules.addAll(mustSplitRules); |
} |
- _splitter._workList.add(state); |
+ _splitter.enqueue(state); |
} |
// Stop once we've tried all of the ones we can. |
@@ -346,22 +203,50 @@ class SolveState implements Comparable<SolveState> { |
if (!_ruleValues.contains(rule)) { |
// Pass a dummy callback because zero will never fail. (If it would |
// have, that rule would already be bound to some other value.) |
- if (!unsplitRules.tryBind(_splitter._rules, rule, 0, (_) {})) { |
+ if (!unsplitRules.tryBind(_splitter.rules, rule, 0, (_) {})) { |
break; |
} |
} |
} |
} |
+ /// Returns `true` if [other] overlaps this state. |
+ bool _isOverlapping(SolveState other) { |
+ _ensureOverlapFields(); |
+ other._ensureOverlapFields(); |
+ |
+ // Lines that contain both bound and unbound rules must have the same |
+ // bound values. |
+ if (_boundRulesInUnboundLines.length != |
+ other._boundRulesInUnboundLines.length) { |
+ return false; |
+ } |
+ |
+ for (var rule in _boundRulesInUnboundLines) { |
+ if (!other._boundRulesInUnboundLines.contains(rule)) return false; |
+ if (_ruleValues.getValue(rule) != other._ruleValues.getValue(rule)) { |
+ return false; |
+ } |
+ } |
+ |
+ if (_constraints.length != other._constraints.length) return false; |
+ |
+ for (var rule in _constraints.keys) { |
+ if (_constraints[rule] != other._constraints[rule]) return false; |
+ } |
+ |
+ return true; |
+ } |
+ |
/// Calculates the [SplitSet] for this solve state, assuming any unbound |
/// rules are set to zero. |
void _calculateSplits() { |
// Figure out which expression nesting levels got split and need to be |
// assigned columns. |
var usedNestingLevels = new Set(); |
- for (var i = 0; i < _splitter._chunks.length - 1; i++) { |
- var chunk = _splitter._chunks[i]; |
- if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) { |
+ for (var i = 0; i < _splitter.chunks.length - 1; i++) { |
+ var chunk = _splitter.chunks[i]; |
+ if (chunk.rule.isSplit(getValue(chunk.rule), chunk)) { |
usedNestingLevels.add(chunk.nesting); |
chunk.nesting.clearTotalUsedIndent(); |
} |
@@ -371,14 +256,14 @@ class SolveState implements Comparable<SolveState> { |
nesting.refreshTotalUsedIndent(usedNestingLevels); |
} |
- _splits = new SplitSet(_splitter._chunks.length); |
- for (var i = 0; i < _splitter._chunks.length - 1; i++) { |
- var chunk = _splitter._chunks[i]; |
- if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) { |
+ _splits = new SplitSet(_splitter.chunks.length); |
+ for (var i = 0; i < _splitter.chunks.length - 1; i++) { |
+ var chunk = _splitter.chunks[i]; |
+ if (chunk.rule.isSplit(getValue(chunk.rule), chunk)) { |
var indent = 0; |
if (!chunk.flushLeftAfter) { |
// Add in the chunk's indent. |
- indent = _splitter._blockIndentation + chunk.indent; |
+ indent = _splitter.blockIndentation + chunk.indent; |
// And any expression nesting. |
indent += chunk.nesting.totalUsedIndent; |
@@ -389,14 +274,6 @@ class SolveState implements Comparable<SolveState> { |
} |
} |
- /// Gets the value to use for [rule], either the bound value or `0` if it |
- /// isn't bound. |
- int _getValue(Rule rule) { |
- if (rule is HardSplitRule) return 0; |
- |
- return _ruleValues.getValue(rule); |
- } |
- |
/// Evaluates the cost (i.e. the relative "badness") of splitting the line |
/// into [lines] physical lines based on the current set of rules. |
void _calculateCost() { |
@@ -407,7 +284,7 @@ class SolveState implements Comparable<SolveState> { |
var cost = 0; |
_overflowChars = 0; |
- var length = _splitter._firstLineIndent; |
+ var length = _splitter.firstLineIndent; |
// The unbound rules in use by the current line. This will be null after |
// the first long line has completed. |
@@ -416,8 +293,8 @@ class SolveState implements Comparable<SolveState> { |
endLine(int end) { |
// Track lines that went over the length. It is only rules contained in |
// long lines that we may want to split. |
- if (length > _splitter._writer.pageWidth) { |
- _overflowChars += length - _splitter._writer.pageWidth; |
+ if (length > _splitter.writer.pageWidth) { |
+ _overflowChars += length - _splitter.writer.pageWidth; |
// Only try rules that are in the first long line, since we know at |
// least one of them *will* be split. |
@@ -438,13 +315,13 @@ class SolveState implements Comparable<SolveState> { |
// one split occurs in it. |
var splitSpans = new Set(); |
- for (var i = 0; i < _splitter._chunks.length; i++) { |
- var chunk = _splitter._chunks[i]; |
+ for (var i = 0; i < _splitter.chunks.length; i++) { |
+ var chunk = _splitter.chunks[i]; |
length += chunk.text.length; |
// Ignore the split after the last chunk. |
- if (i == _splitter._chunks.length - 1) break; |
+ if (i == _splitter.chunks.length - 1) break; |
if (_splits.shouldSplitAt(i)) { |
endLine(i); |
@@ -454,7 +331,7 @@ class SolveState implements Comparable<SolveState> { |
// Include the cost of the nested block. |
if (chunk.blockChunks.isNotEmpty) { |
cost += |
- _splitter._writer.formatBlock(chunk, _splits.getColumn(i)).cost; |
+ _splitter.writer.formatBlock(chunk, _splits.getColumn(i)).cost; |
} |
// Start the new line. |
@@ -478,7 +355,7 @@ class SolveState implements Comparable<SolveState> { |
} |
// Add the costs for the rules that split. |
- _ruleValues.forEach(_splitter._rules, (rule, value) { |
+ _ruleValues.forEach(_splitter.rules, (rule, value) { |
// A rule may be bound to zero if another rule constrains it to not split. |
if (value != 0) cost += rule.cost; |
}); |
@@ -487,16 +364,83 @@ class SolveState implements Comparable<SolveState> { |
for (var span in splitSpans) cost += span.cost; |
// Finish the last line. |
- endLine(_splitter._chunks.length); |
+ endLine(_splitter.chunks.length); |
_splits.setCost(cost); |
} |
+ /// Lazily initializes the fields needed to compare two states for overlap. |
+ /// |
+ /// We do this lazily because the calculation is a bit slow, and is only |
+ /// needed when we have two states with the same score. |
+ void _ensureOverlapFields() { |
+ if (_constraints != null) return; |
+ |
+ _calculateConstraints(); |
+ _calculateBoundRulesInUnboundLines(); |
+ } |
+ |
+ /// Initializes [_constraints] with any constraints the bound rules place on |
+ /// the unbound rules. |
+ void _calculateConstraints() { |
+ _constraints = {}; |
+ |
+ var unboundRules = []; |
+ var boundRules = []; |
+ |
+ for (var rule in _splitter.rules) { |
+ if (_ruleValues.contains(rule)) { |
+ boundRules.add(rule); |
+ } else { |
+ unboundRules.add(rule); |
+ } |
+ } |
+ |
+ for (var bound in boundRules) { |
+ var value = _ruleValues.getValue(bound); |
+ |
+ for (var unbound in unboundRules) { |
+ var constraint = bound.constrain(value, unbound); |
+ if (constraint != null) { |
+ _constraints[unbound] = constraint; |
+ } |
+ } |
+ } |
+ } |
+ |
+ void _calculateBoundRulesInUnboundLines() { |
+ _boundRulesInUnboundLines = new Set(); |
+ |
+ var boundInLine = new Set(); |
+ var hasUnbound = false; |
+ |
+ for (var i = 0; i < _splitter.chunks.length - 1; i++) { |
+ |
+ if (splits.shouldSplitAt(i)) { |
+ if (hasUnbound) _boundRulesInUnboundLines.addAll(boundInLine); |
+ |
+ boundInLine.clear(); |
+ hasUnbound = false; |
+ } |
+ |
+ var rule = _splitter.chunks[i].rule; |
+ if (rule != null && rule is! HardSplitRule) { |
+ if (_ruleValues.contains(rule)) { |
+ boundInLine.add(rule); |
+ } else { |
+ hasUnbound = true; |
+ } |
+ } |
+ } |
+ |
+ if (hasUnbound) _boundRulesInUnboundLines.addAll(boundInLine); |
+ } |
+ |
String toString() { |
var buffer = new StringBuffer(); |
buffer.writeAll( |
- _splitter._rules.map((rule) { |
+ _splitter.rules.map((rule) { |
var valueLength = "${rule.fullySplitValue}".length; |
var value = "?"; |