| Index: lib/src/line_splitter.dart
|
| diff --git a/lib/src/line_splitter.dart b/lib/src/line_splitter.dart
|
| index 5cfcecf0f41bea5f26966bdd7aa422537fade021..f89d4f430e3cd197901775332035517a77b24c0b 100644
|
| --- a/lib/src/line_splitter.dart
|
| +++ b/lib/src/line_splitter.dart
|
| @@ -1,349 +1,435 @@
|
| -// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
|
| +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
|
| // for details. All rights reserved. Use of this source code is governed by a
|
| // BSD-style license that can be found in the LICENSE file.
|
|
|
| library dart_style.src.line_splitter;
|
|
|
| -import 'dart:math' as math;
|
| +import 'package:collection/priority_queue.dart';
|
|
|
| import 'chunk.dart';
|
| import 'debug.dart' as debug;
|
| -import 'line_prefix.dart';
|
| import 'line_writer.dart';
|
| import 'rule/rule.dart';
|
| +import 'rule_set.dart';
|
|
|
| -/// Takes a series of [Chunk]s and determines the best way to split them into
|
| -/// lines of output that fit within the page width (if possible).
|
| +/// To ensure the solver doesn't go totally pathological on giant code, we cap
|
| +/// it at a fixed number of attempts.
|
| ///
|
| -/// Imagine a naïve solution. You start at the first chunk in the list and work
|
| -/// your way forward. For each chunk, you try every possible value of the rule
|
| -/// that the chunk uses. Then, you ask the rule if that chunk should split or
|
| -/// not when the rule has that value. If it does, then you try every possible
|
| -/// way you could modify the expression nesting context at that point.
|
| -///
|
| -/// For each of those possible results, you start with that as a partial
|
| -/// solution. You move onto the next chunk and repeat the process, building on
|
| -/// to that partial solution. When you reach the last chunk, you determine the
|
| -/// cost of that set of rule values and keep track of the best one.
|
| -///
|
| -/// Once you've tried every single possible permutation of rule values and
|
| -/// nesting stacks, you will have exhaustively covered every possible way the
|
| -/// line can be split and you'll know which one had the lowest cost. That's
|
| -/// your result.
|
| -///
|
| -/// The obvious problem is that this is exponential in the number of values for
|
| -/// each rule and the expression nesting levels, both of which can be quite
|
| -/// large with things like long method chains containing function literals,
|
| -/// large collection literals, etc. To tame that, this uses dynamic programming.
|
| -///
|
| -/// As the naïve solver is working its way down the chunks, it's building up a
|
| -/// partial solution. This solution has:
|
| +/// If the optimal solution isn't found after this many tries, it just uses the
|
| +/// best it found so far.
|
| +const _maxAttempts = 50000;
|
| +
|
| +/// Takes a set of chunks and determines the best values for its rules in order
|
| +/// to fit it inside the page boundary.
|
| ///
|
| -/// * A length—the number of chunks in it.
|
| -/// * The set of rule values we have selected for the rules used by those
|
| -/// chunks.
|
| -/// * The expression nesting stack that is currently active at the point where
|
| -/// the current chunk splits.
|
| +/// This problem is exponential in the number of rules and a single expression
|
| +/// in Dart can be quite large, so it isn't feasible to brute force this. For
|
| +/// example:
|
| ///
|
| -/// We'll call this partial solution a [LinePrefix]. The naïve solution starts
|
| -/// with an empty prefix (zero length, no rule values, no expression nesting).
|
| -/// It grabs the next chunk after that prefix and says, OK, given this prefix
|
| -/// what are all of the possible ways we can split that chunk (rule values and
|
| -/// nesting stacks). Create a new, one-chunk-longer prefix for each of those
|
| -/// and then recursively solve each of their suffixes.
|
| +/// outer(
|
| +/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
|
| +/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
|
| +/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
|
| +/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
|
| ///
|
| -/// Normally, this would basically be a depth-first traversal of the entire
|
| -/// possible solution space. However, there is a simple optimization we can
|
| -/// make. When brute forcing the solution tree, we will often solve many
|
| -/// prefixes with the same length.
|
| +/// There are 509,607,936 ways this can be split.
|
| ///
|
| -/// For example, if the first chunk's rule can take three different values, we
|
| -/// will solve three line prefixes of length one, one for each rule value.
|
| -/// Those in turn may have lots of permutations leading to even more solutions
|
| -/// all with line prefixes of length two, and so on.
|
| +/// The problem is even harder because we may not be able to easily tell if a
|
| +/// given solution is the best one. It's possible that there is *no* solution
|
| +/// that fits in the page (due to long strings or identifiers) so the winning
|
| +/// solution may still have overflow characters. This makes it hard to know
|
| +/// when we are done and can stop looking.
|
| ///
|
| -/// In many cases, we have to treat these line prefixes separately. If two
|
| -/// prefixes have the same length, but fix a different value for some rule, they
|
| -/// may lead to different ways of splitting the suffix since that rule may
|
| -/// appear in a later chunk too.
|
| +/// There are a couple of pieces of domain knowledge we use to cope with this:
|
| ///
|
| -/// But in many cases, the line prefixes are equivalent. For example, consider:
|
| +/// - Changing a rule from unsplit to split will never lower its cost. A
|
| +/// solution with all rules unsplit will always be the one with the lowest
|
| +/// cost (zero). Conversely, setting all of its rules to the maximum split
|
| +/// value will always have the highest cost.
|
| ///
|
| -/// function(
|
| -/// first(arg, arg, arg, arg, arg, arg),
|
| -/// second,
|
| -/// third(arg, arg, arg, arg, arg, arg));
|
| +/// (You might think there is a converse rule about overflow characters. The
|
| +/// solution with the fewest splits will have the most overflow, and the
|
| +/// solution with the most splits will have the least overflow. Alas, because
|
| +/// of indentation, that isn't always the case. Adding a split may *increase*
|
| +/// overflow in some cases.)
|
| ///
|
| -/// The line splitter will try a number of different ways to split the argument
|
| -/// list to `first`. For each of those, it then has to try all of the different
|
| -/// ways it could split `third`. *However*, those two are unrelated. The way
|
| -/// you split `first` has no impact on how you split `third`.
|
| +/// - If all of the chunks for a rule are inside lines that already fit in the
|
| +/// page, then splitting that rule will never improve the solution.
|
| ///
|
| -/// The precise way to state that is that as the line prefix grows *past*
|
| -/// `first()` and its argument list, it contains *less* state. It discards the
|
| -/// rule value it selected for the argument list rule since that rule doesn't
|
| -/// appear anywhere in the suffix and can't affect it. Any expression nesting
|
| -/// inside the argument list has been popped off by the time we get to `second`.
|
| +/// We start off with a [SolveState] where all rules are unbound (which
|
| +/// implicitly treats them as unsplit). For a given solve state, we can produce
|
| +/// a set of expanded states that takes some of the rules in the first long
|
| +/// line and bind them to split values. This always produces new solve states
|
| +/// with higher cost (but often fewer overflow characters) than the parent
|
| +/// state.
|
| ///
|
| -/// We'll try every possible way to split `first()`'s argument list and then
|
| -/// recursively solve the remainder of the line for each of those possible
|
| -/// splits. But each of those remainders will end up having identical prefixes.
|
| -/// For any given [LinePrefix], there is a single best solution for the suffix.
|
| -/// So, once we've found it, we can just reuse it the next time that same
|
| -/// prefix comes up.
|
| +/// We take these expanded states and add them to a work list sorted by cost.
|
| +/// Since unsplit rules always have lower cost solutions, we know that no state
|
| +/// we enqueue later will ever have a lower cost than the ones we already have
|
| +/// enqueued.
|
| ///
|
| -/// In other words, we add a memoization table, [_bestSplits]. It is a map from
|
| -/// [LinePrefix]es to [SplitSet]s. For each prefix, it stores the best set of
|
| -/// splits to use for the following suffix. With this, instead of exploring the
|
| -/// entire solution *tree*, which would be exponential, we just traverse the
|
| -/// solution *graph* where entire branches of the tree that are identical
|
| -/// collapse back together.
|
| +/// Then we keep pulling states off the work list and expanding them and adding
|
| +/// the results back into the list. We do this until we hit a solution where
|
| +/// all characters fit in the page. The first one we find will have the lowest
|
| +/// cost and we're done.
|
| ///
|
| -/// The trick to making this work is storing the minimal amount of data in the
|
| -/// line prefix to *uniquely* identify a suffix while still collapsing as many
|
| -/// branches as possible. In practice, this means aggressively forgetting state
|
| -/// when the prefix grows. In particular, we do not need to keep track of
|
| -/// rule values we selected in the prefix if that rule does not affect anything
|
| -/// in the suffix.
|
| +/// We also keep running track of the best solution we've found so far that
|
| +/// has the fewest overflow characters and the lowest cost. If no solution fits,
|
| +/// we'll use this one.
|
| ///
|
| -/// There are a couple of other smaller scale optimizations too. If we ever hit
|
| -/// a solution whose cost is zero, we know we won't do better, so we stop
|
| -/// searching. If we make it to the end of the chunk list and we haven't gone
|
| -/// past the end of the line, we know we don't need to split anywhere.
|
| +/// As a final escape hatch for pathologically nasty code, after trying some
|
| +/// fixed maximum number of solve states, we just bail and return the best
|
| +/// solution found so far.
|
| ///
|
| -/// But the memoization is the main thing that makes this tractable.
|
| +/// Even with the above algorithmic optimizations, complex code may still
|
| +/// require a lot of exploring to find an optimal solution. To make that fast,
|
| +/// this code is carefully profiled and optimized. If you modify this, make
|
| +/// sure to test against the benchmark to ensure you don't regress performance.
|
| class LineSplitter {
|
| final LineWriter _writer;
|
|
|
| /// The list of chunks being split.
|
| final List<Chunk> _chunks;
|
|
|
| + /// The set of soft rules whose values are being selected.
|
| + final List<Rule> _rules;
|
| +
|
| /// The number of characters of additional indentation to apply to each line.
|
| ///
|
| /// This is used when formatting blocks to get the output into the right
|
| /// column based on where the block appears.
|
| final int _blockIndentation;
|
|
|
| - /// Memoization table for the best set of splits for the remainder of the
|
| - /// line following a given prefix.
|
| - final _bestSplits = <LinePrefix, SplitSet>{};
|
| + /// The starting column of the first line.
|
| + final int _firstLineIndent;
|
|
|
| - /// The rules that appear in the first `n` chunks of the line where `n` is
|
| - /// the index into the list.
|
| - final _prefixRules = <Set<Rule>>[];
|
| + /// The list of solve states to explore further.
|
| + ///
|
| + /// This is sorted lowest-cost first. This ensures that as soon as we find a
|
| + /// solution that fits in the page, we know it will be the lowest cost one
|
| + /// and can stop looking.
|
| + final _workList = new HeapPriorityQueue<SolveState>();
|
|
|
| - /// The rules that appear after the first `n` chunks of the line where `n` is
|
| - /// the index into the list.
|
| - final _suffixRules = <Set<Rule>>[];
|
| + /// The lowest cost solution found so far.
|
| + SolveState _bestSolution;
|
|
|
| /// Creates a new splitter for [_writer] that tries to fit [chunks] into the
|
| /// page width.
|
| - LineSplitter(this._writer, this._chunks, this._blockIndentation);
|
| + LineSplitter(this._writer, List<Chunk> chunks, int blockIndentation,
|
| + int firstLineIndent,
|
| + {bool flushLeft: false})
|
| + : _chunks = chunks,
|
| + // Collect the set of soft rules that we need to select values for.
|
| + _rules = chunks
|
| + .map((chunk) => chunk.rule)
|
| + .where((rule) => rule != null && rule is! HardSplitRule)
|
| + .toSet()
|
| + .toList(growable: false),
|
| + _blockIndentation = blockIndentation,
|
| + _firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
|
| + // Store the rule's index in the rule so we can get from a chunk to a rule
|
| + // index quickly.
|
| + for (var i = 0; i < _rules.length; i++) {
|
| + _rules[i].index = i;
|
| + }
|
| + }
|
|
|
| - /// Convert the line to a [String] representation.
|
| + /// Determine the best way to split the chunks into lines that fit in the
|
| + /// page, if possible.
|
| ///
|
| - /// It will determine how best to split it into multiple lines of output and
|
| - /// write the result to [writer].
|
| + /// Returns a [SplitSet] that defines where each split occurs and the
|
| + /// indentation of each line.
|
| ///
|
| /// [firstLineIndent] is the number of characters of whitespace to prefix the
|
| /// first line of output with.
|
| - SplitSolution apply(int firstLineIndent, {bool flushLeft}) {
|
| - if (debug.traceSplitter) {
|
| - debug.log(debug.green("\nSplitting:"));
|
| - debug.dumpChunks(0, _chunks);
|
| - debug.log();
|
| - }
|
| + SplitSet apply() {
|
| + // Start with a completely unbound, unsplit solution.
|
| + _workList.add(new SolveState(this, new RuleSet(_rules.length)));
|
|
|
| - // Ignore the trailing rule on the last chunk since it isn't used for
|
| - // anything.
|
| - var ruleChunks = _chunks.take(_chunks.length - 1);
|
| + var attempts = 0;
|
| + while (!_workList.isEmpty) {
|
| + var state = _workList.removeFirst();
|
|
|
| - // Pre-calculate the set of rules appear before and after each length. We
|
| - // use these frequently when creating [LinePrefix]es and they only depend
|
| - // on the length, so we can cache them up front.
|
| - for (var i = 0; i < _chunks.length; i++) {
|
| - _prefixRules.add(ruleChunks.take(i).map((chunk) => chunk.rule).toSet());
|
| - _suffixRules.add(ruleChunks.skip(i).map((chunk) => chunk.rule).toSet());
|
| - }
|
| + if (state.isBetterThan(_bestSolution)) {
|
| + _bestSolution = state;
|
|
|
| - var prefix = new LinePrefix(firstLineIndent + _blockIndentation,
|
| - flushLeft: flushLeft);
|
| - var solution = new SplitSolution(prefix);
|
| - _tryChunkRuleValues(solution, prefix);
|
| - return solution;
|
| - }
|
| + // Since we sort solutions by cost the first solution we find that
|
| + // fits is the winner.
|
| + if (_bestSolution.overflowChars == 0) break;
|
| + }
|
|
|
| - /// Finds the best set of splits to apply to the remainder of the chunks
|
| - /// following [prefix].
|
| - ///
|
| - /// This can only be called for a suffix that begins a new line. (In other
|
| - /// words, the last chunk in the prefix cannot be unsplit.)
|
| - SplitSet _findBestSplits(LinePrefix prefix) {
|
| - // Use the memoized result if we have it.
|
| - if (_bestSplits.containsKey(prefix)) {
|
| if (debug.traceSplitter) {
|
| - debug.log("memoized splits for $prefix = ${_bestSplits[prefix]}");
|
| + var best = state == _bestSolution ? " (best)" : "";
|
| + debug.log("$state$best");
|
| + debug.dumpLines(_chunks, _firstLineIndent, state.splits);
|
| + debug.log();
|
| }
|
| - return _bestSplits[prefix];
|
| - }
|
|
|
| - if (debug.traceSplitter) {
|
| - debug.log("find splits for $prefix");
|
| - debug.indent();
|
| - }
|
| + if (attempts++ > _maxAttempts) break;
|
|
|
| - var solution = new SplitSolution(prefix);
|
| - _tryChunkRuleValues(solution, prefix);
|
| + // Try bumping the rule values for rules whose chunks are on long lines.
|
| + state.expand();
|
| + }
|
|
|
| if (debug.traceSplitter) {
|
| - debug.unindent();
|
| - debug.log("best splits for $prefix = ${solution.splits}");
|
| + debug.log("$_bestSolution (winner)");
|
| + debug.dumpLines(_chunks, _firstLineIndent, _bestSolution.splits);
|
| + debug.log();
|
| }
|
|
|
| - return _bestSplits[prefix] = solution.splits;
|
| + return _bestSolution.splits;
|
| }
|
| +}
|
|
|
| - /// Updates [solution] with the best rule value selection for the chunk
|
| - /// immediately following [prefix].
|
| - void _tryChunkRuleValues(SplitSolution solution, LinePrefix prefix) {
|
| - // If we made it to the end, this prefix can be solved without splitting
|
| - // any chunks.
|
| - if (prefix.length == _chunks.length - 1) {
|
| - solution.update(this, new SplitSet());
|
| - return;
|
| +/// A possibly incomplete solution in the line splitting search space.
|
| +///
|
| +/// A single [SolveState] binds some subset of the rules to values while
|
| +/// leaving the rest unbound. If every rule is bound, the solve state describes
|
| +/// a complete solution to the line splitting problem. Even if rules are
|
| +/// unbound, a state can also usually be used as a solution by treating all
|
| +/// unbound rules as unsplit. (The usually is because a state that constrains
|
| +/// an unbound rule to split can't be used with that rule unsplit.)
|
| +///
|
| +/// From a given solve state, we can explore the search tree to more refined
|
| +/// solve states by producing new ones that add more bound rules to the current
|
| +/// state.
|
| +class SolveState implements Comparable<SolveState> {
|
| + final LineSplitter _splitter;
|
| + final RuleSet _ruleValues;
|
| +
|
| + /// The unbound rules in this state that can be bound to produce new more
|
| + /// refined states.
|
| + ///
|
| + /// Keeping this set small is the key to make the entire line splitter
|
| + /// perform well. If we consider too make rules at each state, our
|
| + /// exploration of the solution space is too branchy and we waste time on
|
| + /// dead end solutions.
|
| + ///
|
| + /// Here is the key insight. The line splitter treats any unbound rule as
|
| + /// being unsplit. This means refining a solution always means taking a rule
|
| + /// that is unsplit and making it split. That monotonically increases the
|
| + /// cost, but may help fit the solution inside the page.
|
| + ///
|
| + /// We want to keep the cost low, so the only reason to consider making a
|
| + /// rule split is if it reduces an overflowing line. It's also the case that
|
| + /// splitting an earlier rule will often reshuffle the rest of the line.
|
| + ///
|
| + /// Taking that into account, the only rules we consider binding to extend a
|
| + /// solve state are *unbound rules inside the first line that is overflowing*.
|
| + /// Even if a line has dozens of rules, this generally keeps the branching
|
| + /// down to a few. It also means rules inside lines that already fit are
|
| + /// never touched.
|
| + ///
|
| + /// There is one other set of rules that go in here. Sometimes a bound rule
|
| + /// in the solve state constrains some other unbound rule to split. In that
|
| + /// case, we also consider that active so we know to not leave it at zero.
|
| + final _liveRules = new Set<Rule>();
|
| +
|
| + /// The set of splits chosen for this state.
|
| + SplitSet get splits => _splits;
|
| + SplitSet _splits;
|
| +
|
| + /// The number of characters that do not fit inside the page with this set of
|
| + /// splits.
|
| + int get overflowChars => _overflowChars;
|
| + int _overflowChars;
|
| +
|
| + /// Whether we can treat this state as a complete solution by leaving its
|
| + /// unbound rules unsplit.
|
| + ///
|
| + /// This is generally true but will be false if the state contains any
|
| + /// unbound rules that are constrained to not be zero by other bound rules.
|
| + /// This avoids picking a solution that leaves those rules at zero when they
|
| + /// aren't allowed to be.
|
| + bool _isComplete = true;
|
| +
|
| + SolveState(this._splitter, this._ruleValues) {
|
| + _calculateSplits();
|
| + _calculateCost();
|
| + }
|
| +
|
| + /// Orders this state relative to [other].
|
| + ///
|
| + /// This is the best-first ordering that the [LineSplitter] uses in its
|
| + /// worklist. It prefers cheaper states even if they overflow because this
|
| + /// ensures it finds the best solution first as soon as it finds one that
|
| + /// fits in the page so it can early out.
|
| + int compareTo(SolveState other) {
|
| + // TODO(rnystrom): It may be worth sorting by the estimated lowest number
|
| + // of overflow characters first. That doesn't help in cases where there is
|
| + // a solution that fits, but may help in corner cases where there is no
|
| + // fitting solution.
|
| +
|
| + if (splits.cost != other.splits.cost) {
|
| + return splits.cost.compareTo(other.splits.cost);
|
| }
|
|
|
| - var chunk = _chunks[prefix.length];
|
| + if (overflowChars != other.overflowChars) {
|
| + return overflowChars.compareTo(other.overflowChars);
|
| + }
|
|
|
| - // See if we've already selected a value for the rule.
|
| - var value = prefix.ruleValues[chunk.rule];
|
| + // Distinguish states based on the rule values just so that states with the
|
| + // same cost range but different rule values don't get considered identical
|
| + // by HeapPriorityQueue.
|
| + for (var rule in _splitter._rules) {
|
| + var value = _ruleValues.getValue(rule);
|
| + var otherValue = other._ruleValues.getValue(rule);
|
|
|
| - if (value == null) {
|
| - // No, so try every possible value for the rule.
|
| - for (value = 0; value < chunk.rule.numValues; value++) {
|
| - _tryRuleValue(solution, prefix, value);
|
| - }
|
| - } else if (value == -1) {
|
| - // A -1 "value" means, "any non-zero value". In other words, the rule has
|
| - // to split somehow, but can split however it chooses.
|
| - for (value = 1; value < chunk.rule.numValues; value++) {
|
| - _tryRuleValue(solution, prefix, value);
|
| - }
|
| - } else {
|
| - // Otherwise, it's constrained to a single value, so use it.
|
| - _tryRuleValue(solution, prefix, value);
|
| + if (value != otherValue) return value.compareTo(otherValue);
|
| }
|
| - }
|
|
|
| - /// Updates [solution] with the best solution that can be found by setting
|
| - /// the chunk after [prefix]'s rule to [value].
|
| - void _tryRuleValue(SplitSolution solution, LinePrefix prefix, int value) {
|
| - // If we already have an unbeatable solution, don't bother trying this one.
|
| - if (solution.cost == 0) return;
|
| + // If we get here, this state is identical to [other].
|
| + return 0;
|
| + }
|
|
|
| - var chunk = _chunks[prefix.length];
|
| + /// Returns `true` if this state is a better solution to use as the final
|
| + /// result than [other].
|
| + bool isBetterThan(SolveState other) {
|
| + // If this state contains an unbound rule that we know can't be left
|
| + // unsplit, we can't pick this as a solution.
|
| + if (!_isComplete) return false;
|
|
|
| - if (chunk.rule.isSplit(value, chunk)) {
|
| - // The chunk is splitting in an expression, so try all of the possible
|
| - // nesting combinations.
|
| - var ruleValues = _advancePrefix(prefix, value);
|
| - var longerPrefixes = prefix.split(chunk, _blockIndentation, ruleValues);
|
| - for (var longerPrefix in longerPrefixes) {
|
| - _tryLongerPrefix(solution, prefix, longerPrefix);
|
| + // Anything is better than nothing.
|
| + if (other == null) return true;
|
|
|
| - // If we find an unbeatable solution, don't try any more.
|
| - if (solution.cost == 0) break;
|
| - }
|
| - } else {
|
| - // We didn't split here, so add this chunk and its rule value to the
|
| - // prefix and continue on to the next.
|
| - var extended = prefix.extend(_advancePrefix(prefix, value));
|
| - _tryChunkRuleValues(solution, extended);
|
| + // Prefer the solution that fits the most in the page.
|
| + if (overflowChars != other.overflowChars) {
|
| + return overflowChars < other.overflowChars;
|
| }
|
| - }
|
| -
|
| - /// Updates [solution] with the solution for [prefix] assuming it uses
|
| - /// [longerPrefix] for the next chunk.
|
| - void _tryLongerPrefix(
|
| - SplitSolution solution, LinePrefix prefix, LinePrefix longerPrefix) {
|
| - var remaining = _findBestSplits(longerPrefix);
|
|
|
| - // If it wasn't possible to split the suffix given this nesting stack,
|
| - // skip it.
|
| - if (remaining == null) return;
|
| -
|
| - solution.update(this, remaining.add(prefix.length, longerPrefix.column));
|
| + // Otherwise, prefer the best cost.
|
| + return splits.cost < other.splits.cost;
|
| }
|
|
|
| - /// Determines the set of rule values for a new [LinePrefix] one chunk longer
|
| - /// than [prefix] whose rule on the new last chunk has [value].
|
| + /// Enqueues more solve states to consider based on this one.
|
| ///
|
| - /// Returns a map of [Rule]s to values for those rules for the values that
|
| - /// span the prefix and suffix of the [LinePrefix].
|
| - Map<Rule, int> _advancePrefix(LinePrefix prefix, int value) {
|
| - // Get the rules that appear in both in and after the new prefix. These are
|
| - // the rules that already have values that the suffix needs to honor.
|
| - var prefixRules = _prefixRules[prefix.length + 1];
|
| - var suffixRules = _suffixRules[prefix.length + 1];
|
| -
|
| - var nextRule = _chunks[prefix.length].rule;
|
| - var updatedValues = {};
|
| -
|
| - for (var prefixRule in prefixRules) {
|
| - var ruleValue =
|
| - prefixRule == nextRule ? value : prefix.ruleValues[prefixRule];
|
| -
|
| - if (suffixRules.contains(prefixRule)) {
|
| - // If the same rule appears in both the prefix and suffix, then preserve
|
| - // its exact value.
|
| - updatedValues[prefixRule] = ruleValue;
|
| + /// For each unbound rule in this state that occurred in the first long line,
|
| + /// enqueue solve states that bind that rule to each value it can have and
|
| + /// bind all previous rules to zero. (In other words, try all subsolutions
|
| + /// where that rule becomes the first new rule to split at.)
|
| + void expand() {
|
| + var unsplitRules = _ruleValues.clone();
|
| +
|
| + // Walk down the rules looking for unbound ones to try.
|
| + var triedRules = 0;
|
| + for (var rule in _splitter._rules) {
|
| + if (_liveRules.contains(rule)) {
|
| + // We found one worth trying, so try all of its values.
|
| + for (var value = 1; value < rule.numValues; value++) {
|
| + var boundRules = unsplitRules.clone();
|
| +
|
| + var mustSplitRules;
|
| + var valid = boundRules.tryBind(_splitter._rules, rule, value, (rule) {
|
| + if (mustSplitRules == null) mustSplitRules = [];
|
| + mustSplitRules.add(rule);
|
| + });
|
| +
|
| + // Make sure we don't violate the constraints of the bound rules.
|
| + if (!valid) continue;
|
| +
|
| + var state = new SolveState(_splitter, boundRules);
|
| +
|
| + // If some unbound rules are constrained to split, remember that.
|
| + if (mustSplitRules != null) {
|
| + state._isComplete = false;
|
| + state._liveRules.addAll(mustSplitRules);
|
| + }
|
| +
|
| + _splitter._workList.add(state);
|
| + }
|
| +
|
| + // Stop once we've tried all of the ones we can.
|
| + if (++triedRules == _liveRules.length) break;
|
| }
|
|
|
| - // If we haven't specified any value for this rule in the prefix, it
|
| - // doesn't place any constraint on the suffix.
|
| - if (ruleValue == null) continue;
|
| + // Fill in previous unbound rules with zero.
|
| + if (!_ruleValues.contains(rule)) {
|
| + // Pass a dummy callback because zero will never fail. (If it would
|
| + // have, that rule would already be bound to some other value.)
|
| + if (!unsplitRules.tryBind(_splitter._rules, rule, 0, (_) {})) {
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + }
|
|
|
| - // Enforce the constraints between rules.
|
| - for (var suffixRule in suffixRules) {
|
| - if (suffixRule == prefixRule) continue;
|
| + /// Calculates the [SplitSet] for this solve state, assuming any unbound
|
| + /// rules are set to zero.
|
| + void _calculateSplits() {
|
| + // Figure out which expression nesting levels got split and need to be
|
| + // assigned columns.
|
| + var usedNestingLevels = new Set();
|
| + for (var i = 0; i < _splitter._chunks.length - 1; i++) {
|
| + var chunk = _splitter._chunks[i];
|
| + if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) {
|
| + usedNestingLevels.add(chunk.nesting);
|
| + chunk.nesting.clearTotalUsedIndent();
|
| + }
|
| + }
|
|
|
| - // See if the prefix rule's value constrains any values in the suffix.
|
| - var value = prefixRule.constrain(ruleValue, suffixRule);
|
| + for (var nesting in usedNestingLevels) {
|
| + nesting.refreshTotalUsedIndent(usedNestingLevels);
|
| + }
|
|
|
| - // Also consider the backwards case, where a later rule in the suffix
|
| - // constrains a rule in the prefix.
|
| - if (value == null) {
|
| - value = suffixRule.reverseConstrain(ruleValue, prefixRule);
|
| + _splits = new SplitSet(_splitter._chunks.length);
|
| + for (var i = 0; i < _splitter._chunks.length - 1; i++) {
|
| + var chunk = _splitter._chunks[i];
|
| + if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) {
|
| + var indent = 0;
|
| + if (!chunk.flushLeftAfter) {
|
| + // Add in the chunk's indent.
|
| + indent = _splitter._blockIndentation + chunk.indent;
|
| +
|
| + // And any expression nesting.
|
| + indent += chunk.nesting.totalUsedIndent;
|
| }
|
|
|
| - if (value != null) {
|
| - updatedValues[prefixRule] = ruleValue;
|
| - updatedValues[suffixRule] = value;
|
| - }
|
| + _splits.add(i, indent);
|
| }
|
| }
|
| + }
|
| +
|
| + /// Gets the value to use for [rule], either the bound value or `0` if it
|
| + /// isn't bound.
|
| + int _getValue(Rule rule) {
|
| + if (rule is HardSplitRule) return 0;
|
|
|
| - return updatedValues;
|
| + return _ruleValues.getValue(rule);
|
| }
|
|
|
| /// Evaluates the cost (i.e. the relative "badness") of splitting the line
|
| /// into [lines] physical lines based on the current set of rules.
|
| - int _evaluateCost(LinePrefix prefix, SplitSet splits) {
|
| - assert(splits != null);
|
| + void _calculateCost() {
|
| + assert(_splits != null);
|
|
|
| // Calculate the length of each line and apply the cost of any spans that
|
| // get split.
|
| var cost = 0;
|
| - var length = prefix.column;
|
| + _overflowChars = 0;
|
| +
|
| + var length = _splitter._firstLineIndent;
|
|
|
| - var splitRules = new Set();
|
| + // The unbound rules in use by the current line. This will be null after
|
| + // the first long line has completed.
|
| + var currentLineRules = [];
|
|
|
| - endLine() {
|
| - // Punish lines that went over the length. We don't rule these out
|
| - // completely because it may be that the only solution still goes over
|
| - // (for example with long string literals).
|
| - if (length > _writer.pageWidth) {
|
| - cost += (length - _writer.pageWidth) * Cost.overflowChar;
|
| + endLine(int end) {
|
| + // Track lines that went over the length. It is only rules contained in
|
| + // long lines that we may want to split.
|
| + if (length > _splitter._writer.pageWidth) {
|
| + _overflowChars += length - _splitter._writer.pageWidth;
|
| +
|
| + // Only try rules that are in the first long line, since we know at
|
| + // least one of them *will* be split.
|
| + if (currentLineRules != null && currentLineRules.isNotEmpty) {
|
| + _liveRules.addAll(currentLineRules);
|
| + currentLineRules = null;
|
| + }
|
| + } else {
|
| + // The line fit, so don't keep track of its rules.
|
| + if (currentLineRules != null) {
|
| + currentLineRules.clear();
|
| + }
|
| }
|
| }
|
|
|
| @@ -352,131 +438,89 @@ class LineSplitter {
|
| // one split occurs in it.
|
| var splitSpans = new Set();
|
|
|
| - for (var i = prefix.length; i < _chunks.length; i++) {
|
| - var chunk = _chunks[i];
|
| + for (var i = 0; i < _splitter._chunks.length; i++) {
|
| + var chunk = _splitter._chunks[i];
|
|
|
| length += chunk.text.length;
|
|
|
| - if (i < _chunks.length - 1) {
|
| - if (splits.shouldSplitAt(i)) {
|
| - endLine();
|
| + // Ignore the split after the last chunk.
|
| + if (i == _splitter._chunks.length - 1) break;
|
|
|
| - splitSpans.addAll(chunk.spans);
|
| + if (_splits.shouldSplitAt(i)) {
|
| + endLine(i);
|
|
|
| - if (chunk.rule != null && !splitRules.contains(chunk.rule)) {
|
| - // Don't double-count rules if multiple splits share the same
|
| - // rule.
|
| - splitRules.add(chunk.rule);
|
| - cost += chunk.rule.cost;
|
| - }
|
| -
|
| - // Include the cost of the nested block.
|
| - if (chunk.blockChunks.isNotEmpty) {
|
| - cost += _writer.formatBlock(chunk, splits.getColumn(i)).cost;
|
| - }
|
| + splitSpans.addAll(chunk.spans);
|
|
|
| - // Start the new line.
|
| - length = splits.getColumn(i);
|
| - } else {
|
| - if (chunk.spaceWhenUnsplit) length++;
|
| + // Include the cost of the nested block.
|
| + if (chunk.blockChunks.isNotEmpty) {
|
| + cost +=
|
| + _splitter._writer.formatBlock(chunk, _splits.getColumn(i)).cost;
|
| + }
|
|
|
| - // Include the nested block inline, if any.
|
| - length += chunk.unsplitBlockLength;
|
| + // Start the new line.
|
| + length = _splits.getColumn(i);
|
| + } else {
|
| + if (chunk.spaceWhenUnsplit) length++;
|
| +
|
| + // Include the nested block inline, if any.
|
| + length += chunk.unsplitBlockLength;
|
| +
|
| + // If we might be in the first overly long line, keep track of any
|
| + // unbound rules we encounter. These are ones that we'll want to try to
|
| + // bind to shorten the long line.
|
| + if (currentLineRules != null &&
|
| + chunk.rule != null &&
|
| + !chunk.isHardSplit &&
|
| + !_ruleValues.contains(chunk.rule)) {
|
| + currentLineRules.add(chunk.rule);
|
| }
|
| }
|
| }
|
|
|
| + // Add the costs for the rules that split.
|
| + _ruleValues.forEach(_splitter._rules, (rule, value) {
|
| + // A rule may be bound to zero if another rule constrains it to not split.
|
| + if (value != 0) cost += rule.cost;
|
| + });
|
| +
|
| // Add the costs for the spans containing splits.
|
| for (var span in splitSpans) cost += span.cost;
|
|
|
| // Finish the last line.
|
| - endLine();
|
| + endLine(_splitter._chunks.length);
|
|
|
| - return cost;
|
| + _splits.setCost(cost);
|
| }
|
| -}
|
| -
|
| -/// Keeps track of the best set of splits found so far for a suffix of some
|
| -/// prefix.
|
| -class SplitSolution {
|
| - /// The prefix whose suffix we are finding a solution for.
|
| - final LinePrefix _prefix;
|
| -
|
| - /// The best set of splits currently found.
|
| - SplitSet get splits => _bestSplits;
|
| - SplitSet _bestSplits;
|
| -
|
| - /// The lowest cost currently found.
|
| - int get cost => _lowestCost;
|
| - int _lowestCost;
|
| -
|
| - /// Whether a solution that fits within a page has been found yet.
|
| - bool get isAdequate => _lowestCost != null && _lowestCost < Cost.overflowChar;
|
| -
|
| - SplitSolution(this._prefix);
|
|
|
| - /// Compares [splits] to the best solution found so far and keeps it if it's
|
| - /// better.
|
| - void update(LineSplitter splitter, SplitSet splits) {
|
| - var cost = splitter._evaluateCost(_prefix, splits);
|
| + String toString() {
|
| + var buffer = new StringBuffer();
|
|
|
| - if (_lowestCost == null || cost < _lowestCost) {
|
| - _bestSplits = splits;
|
| - _lowestCost = cost;
|
| - }
|
| + buffer.writeAll(
|
| + _splitter._rules.map((rule) {
|
| + var valueLength = "${rule.fullySplitValue}".length;
|
|
|
| - if (debug.traceSplitter) {
|
| - var best = _bestSplits == splits ? " (best)" : "";
|
| - debug.log(debug.gray("$_prefix $splits \$$cost$best"));
|
| - debug.dumpLines(splitter._chunks, _prefix, splits);
|
| - debug.log();
|
| - }
|
| - }
|
| -}
|
| + var value = "?";
|
| + if (_ruleValues.contains(rule)) {
|
| + value = "${_ruleValues.getValue(rule)}";
|
| + }
|
|
|
| -/// An immutable, persistent set of enabled split [Chunk]s.
|
| -///
|
| -/// For each chunk, this tracks if it has been split and, if so, what the
|
| -/// chosen column is for the following line.
|
| -///
|
| -/// Internally, this uses a sparse parallel list where each element corresponds
|
| -/// to the column of the chunk at that index in the chunk list, or `null` if
|
| -/// there is no active split there. This had about a 10% perf improvement over
|
| -/// using a [Set] of splits or a persistent linked list of split index/indent
|
| -/// pairs.
|
| -class SplitSet {
|
| - List<int> _columns;
|
| -
|
| - /// Creates a new empty split set.
|
| - SplitSet() : this._(const []);
|
| -
|
| - SplitSet._(this._columns);
|
| -
|
| - /// Returns a new [SplitSet] containing the union of this one and the split
|
| - /// at [index] with next line starting at [column].
|
| - SplitSet add(int index, int column) {
|
| - var newIndents = new List(math.max(index + 1, _columns.length));
|
| - newIndents.setAll(0, _columns);
|
| - newIndents[index] = column;
|
| -
|
| - return new SplitSet._(newIndents);
|
| - }
|
| + value = value.padLeft(valueLength);
|
| + if (_liveRules.contains(rule)) {
|
| + value = debug.bold(value);
|
| + } else {
|
| + value = debug.gray(value);
|
| + }
|
|
|
| - /// Returns `true` if the chunk at [splitIndex] should be split.
|
| - bool shouldSplitAt(int index) =>
|
| - index < _columns.length && _columns[index] != null;
|
| + return value;
|
| + }),
|
| + " ");
|
|
|
| - /// Gets the zero-based starting column for the chunk at [index].
|
| - int getColumn(int index) => _columns[index];
|
| + buffer.write(" \$${splits.cost}");
|
|
|
| - String toString() {
|
| - var result = [];
|
| - for (var i = 0; i < _columns.length; i++) {
|
| - if (_columns[i] != null) {
|
| - result.add("$i:${_columns[i]}");
|
| - }
|
| - }
|
| + if (overflowChars > 0) buffer.write(" (${overflowChars} over)");
|
| + if (!_isComplete) buffer.write(" (incomplete)");
|
| + if (splits == null) buffer.write(" invalid");
|
|
|
| - return result.join(" ");
|
| + return buffer.toString();
|
| }
|
| }
|
|
|