Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1183)

Side by Side Diff: lib/src/chunk_builder.dart

Issue 1255643002: New, simpler and faster line splitter. (Closed) Base URL: https://github.com/dart-lang/dart_style.git@master
Patch Set: Optimize nesting. Reformat. Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/src/chunk.dart ('k') | lib/src/debug.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 library dart_style.src.chunk_builder; 5 library dart_style.src.chunk_builder;
6 6
7 import 'chunk.dart'; 7 import 'chunk.dart';
8 import 'dart_formatter.dart'; 8 import 'dart_formatter.dart';
9 import 'debug.dart' as debug; 9 import 'debug.dart' as debug;
10 import 'line_splitter.dart';
11 import 'line_writer.dart'; 10 import 'line_writer.dart';
12 import 'nesting.dart'; 11 import 'nesting_level.dart';
13 import 'nesting_builder.dart'; 12 import 'nesting_builder.dart';
14 import 'rule/rule.dart'; 13 import 'rule/rule.dart';
15 import 'source_code.dart'; 14 import 'source_code.dart';
16 import 'whitespace.dart'; 15 import 'whitespace.dart';
17 16
18 /// Takes the incremental serialized output of [SourceVisitor]--the source text 17 /// Takes the incremental serialized output of [SourceVisitor]--the source text
19 /// along with any comments and preserved whitespace--and produces a coherent 18 /// along with any comments and preserved whitespace--and produces a coherent
20 /// tree of [Chunk]s which can then be split into physical lines. 19 /// tree of [Chunk]s which can then be split into physical lines.
21 /// 20 ///
22 /// Keeps track of leading indentation, expression nesting, and all of the hairy 21 /// Keeps track of leading indentation, expression nesting, and all of the hairy
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after
360 for (var i = openSpan.start; i < end; i++) { 359 for (var i = openSpan.start; i < end; i++) {
361 var chunk = _chunks[i]; 360 var chunk = _chunks[i];
362 if (!chunk.isHardSplit) chunk.spans.add(span); 361 if (!chunk.isHardSplit) chunk.spans.add(span);
363 } 362 }
364 } 363 }
365 364
366 /// Starts a new [Rule]. 365 /// Starts a new [Rule].
367 /// 366 ///
368 /// If omitted, defaults to a new [SimpleRule]. 367 /// If omitted, defaults to a new [SimpleRule].
369 void startRule([Rule rule]) { 368 void startRule([Rule rule]) {
370 //assert(_pendingRule == null);
371
372 if (rule == null) rule = new SimpleRule(); 369 if (rule == null) rule = new SimpleRule();
373 370
374 // See if any of the rules that contain this one care if it splits. 371 // See if any of the rules that contain this one care if it splits.
375 _rules.forEach((outer) => outer.contain(rule)); 372 _rules.forEach((outer) => outer.contain(rule));
376 _rules.add(rule); 373 _rules.add(rule);
377
378 // Keep track of the rule's chunk range so we know how to calculate its
379 // length for preemption.
380 rule.start = _currentChunkIndex;
381 } 374 }
382 375
383 /// Starts a new [Rule] that comes into play *after* the next whitespace 376 /// Starts a new [Rule] that comes into play *after* the next whitespace
384 /// (including comments) is written. 377 /// (including comments) is written.
385 /// 378 ///
386 /// This is used for binary operators who want to start a rule before the 379 /// This is used for binary operators who want to start a rule before the
387 /// first operand but not get forced to split if a comment appears before the 380 /// first operand but not get forced to split if a comment appears before the
388 /// entire expression. 381 /// entire expression.
389 /// 382 ///
390 /// If [rule] is omitted, defaults to a new [SimpleRule]. 383 /// If [rule] is omitted, defaults to a new [SimpleRule].
391 void startLazyRule([Rule rule]) { 384 void startLazyRule([Rule rule]) {
392 if (rule == null) rule = new SimpleRule(); 385 if (rule == null) rule = new SimpleRule();
393 386
394 _lazyRules.add(rule); 387 _lazyRules.add(rule);
395 } 388 }
396 389
397 /// Ends the innermost rule. 390 /// Ends the innermost rule.
398 void endRule() { 391 void endRule() {
399 // Keep track of the rule's chunk range so we know how to calculate its 392 _rules.removeLast();
400 // length for preemption.
401 _rules.removeLast().end = _currentChunkIndex;
402 } 393 }
403 394
404 /// Pre-emptively forces all of the current rules to become hard splits. 395 /// Pre-emptively forces all of the current rules to become hard splits.
405 /// 396 ///
406 /// This is called by [SourceVisitor] when it can determine that a rule will 397 /// This is called by [SourceVisitor] when it can determine that a rule will
407 /// will always be split. Turning it (and the surrounding rules) into hard 398 /// will always be split. Turning it (and the surrounding rules) into hard
408 /// splits lets the writer break the output into smaller pieces for the line 399 /// splits lets the writer break the output into smaller pieces for the line
409 /// splitter, which helps performance and avoids failing on very large input. 400 /// splitter, which helps performance and avoids failing on very large input.
410 /// 401 ///
411 /// In particular, it's easy for the visitor to know that collections with a 402 /// In particular, it's easy for the visitor to know that collections with a
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after
716 } else { 707 } else {
717 _chunks.add(new Chunk(text)); 708 _chunks.add(new Chunk(text));
718 } 709 }
719 } 710 }
720 711
721 /// Returns true if we can divide the chunks at [index] and line split the 712 /// Returns true if we can divide the chunks at [index] and line split the
722 /// ones before and after that separately. 713 /// ones before and after that separately.
723 bool _canDivideAt(int i) { 714 bool _canDivideAt(int i) {
724 var chunk = _chunks[i]; 715 var chunk = _chunks[i];
725 if (!chunk.isHardSplit) return false; 716 if (!chunk.isHardSplit) return false;
726 if (chunk.nesting.depth != 0) return false; 717 if (chunk.nesting.isNested) return false;
727 if (chunk.blockChunks.isNotEmpty) return false; 718 if (chunk.blockChunks.isNotEmpty) return false;
728 719
729 // Make sure we don't split the line in the middle of a rule. 720 // Make sure we don't split the line in the middle of a rule.
730 var chunks = _ruleChunks[chunk.rule]; 721 var chunks = _ruleChunks[chunk.rule];
731 if (chunks != null && chunks.any((other) => other > i)) return false; 722 if (chunks != null && chunks.any((other) => other > i)) return false;
732 723
733 return true; 724 return true;
734 } 725 }
735 726
736 /// Pre-processes the chunks after they are done being written by the visitor 727 /// Pre-processes the chunks after they are done being written by the visitor
737 /// but before they are run through the line splitter. 728 /// but before they are run through the line splitter.
738 /// 729 ///
739 /// Marks ranges of chunks that can be line split independently to keep the 730 /// Marks ranges of chunks that can be line split independently to keep the
740 /// batches we send to [LineSplitter] small. 731 /// batches we send to [LineSplitter] small.
741 void _divideChunks() { 732 void _divideChunks() {
742 // Harden all of the rules that we know get forced by containing hard 733 // Harden all of the rules that we know get forced by containing hard
743 // splits, along with all of the other rules they constrain. 734 // splits, along with all of the other rules they constrain.
744 _hardenRules(_hardSplitRules); 735 _hardenRules();
745
746 // For each independent set of chunks, see if there are any rules in them
747 // that we want to preemptively harden. This is basically to send smaller
748 // batches of chunks to LineSplitter in cases where the code is deeply
749 // nested or complex.
750 var preemptedRules = new Set();
751
752 var start = 0;
753 for (var i = 0; i < _chunks.length; i++) {
754 if (_canDivideAt(i)) {
755 _preemptRules(start, i, preemptedRules);
756 start = i;
757 }
758 }
759
760 if (start < _chunks.length) {
761 _preemptRules(start, _chunks.length, preemptedRules);
762 }
763
764 _hardenRules(preemptedRules);
765 736
766 // Now that we know where all of the divided chunk sections are, mark the 737 // Now that we know where all of the divided chunk sections are, mark the
767 // chunks. 738 // chunks.
768 for (var i = 0; i < _chunks.length; i++) { 739 for (var i = 0; i < _chunks.length; i++) {
769 _chunks[i].markDivide(_canDivideAt(i)); 740 _chunks[i].markDivide(_canDivideAt(i));
770 } 741 }
771 } 742 }
772 743
773 /// Removes any unused nesting levels from [chunks].
774 ///
775 /// The line splitter considers every possible combination of mapping
776 /// indentation to nesting levels when trying to find the best solution. For
777 /// example, it may assign 4 spaces of indentation to level 1, 8 spaces to
778 /// level 3, etc.
779 ///
780 /// It's fairly common for a nesting level to not actually appear at the
781 /// boundary of a chunk. The source visitor may enter more than one level of
782 /// nesting at a point where a split cannot happen. In that case, there's no
783 /// point in trying to assign an indentation level to that nesting level. It
784 /// will never be used because no line will begin at that level of
785 /// indentation.
786 ///
787 /// Worse, if the splitter *does* consider these levels, it can dramatically
788 /// increase solving time. We can't determine which nesting levels will get
789 /// used eagerly since a level may not be used until later. Instead, when
790 /// we're building all of the chunks, this goes through and renumbers the
791 /// nesting levels.
792 void _flattenNestingLevels(List<Chunk> chunks) {
793 if (chunks.isEmpty) return;
794
795 // Find all of the nesting levels that actually appear at the end of chunks.
796 var usedNestings = new Set();
797 for (var chunk in chunks) {
798 if (chunk.nesting == null) continue;
799 usedNestings.add(chunk.nesting);
800 }
801
802 // Now get rid of any others that didn't occur where a split happened.
803 for (var nesting in usedNestings) {
804 nesting.removeUnused(usedNestings);
805 }
806 }
807
808 /// Adds some rules used by chunks within [start] and [end] to
809 /// [preemptedRules] to force them become hard splits if it looks like
810 /// there's no other option to get a solution in reasonable time.
811 ///
812 /// In most cases, the formatter can find an ideal solution to a set of rules
813 /// in reasonable time. Splitting chunks into short lists, nested blocks, and
814 /// the memoization and block caching that [LineSplitter] does all help.
815 ///
816 /// However, some code isn't helped by any of that. In particular, a very
817 /// large, deeply nested expression that contains no collection or function
818 /// literals has to be solved all at once by the line splitter. Memoization
819 /// helps, but some expressions just have too many permutations.
820 ///
821 /// In practice, this almost always occurs in machine-generated code where
822 /// the output quality doesn't matter as much. To avoid getting stuck there,
823 /// this finds rules that surround more than a page of code and forces them
824 /// to fully split. It only does this if it thinks it won't find a reasonable
825 /// solution otherwise.
826 ///
827 /// This may discard a more optimal solution in some cases. When a rule is
828 /// hardened, it is *fully* hardened. There may have been a solution where
829 /// only some of a rule's chunks needed to be split (for example, not fully
830 /// breaking an argument list). This won't consider those kinds of solution
831 /// To avoid this, pre-emption only kicks in for lines that look like they
832 /// will be hard to solve directly.
833 void _preemptRules(int start, int end, Set<Rule> preemptedRules) {
834 var chunks = _chunks.sublist(start, end);
835 _flattenNestingLevels(chunks);
836
837 var rules = chunks
838 .map((chunk) => chunk.rule)
839 .where((rule) => rule != null && rule is! HardSplitRule)
840 .toSet();
841
842 var values = rules.fold(1, (value, rule) => value * rule.numValues);
843
844 // TODO(rnystrom): We could do something more precise here by taking the
845 // rule types into account.
846
847 // If the number of possible solutions is reasonable, don't preempt any.
848 if (values <= 32768) return;
849
850 // Find the rules that contain too much.
851 for (var rule in rules) {
852 var length = 0;
853 for (var i = rule.start + 1; i <= rule.end; i++) {
854 length += _chunks[i].length;
855 if (length > pageWidth) {
856 preemptedRules.add(rule);
857 break;
858 }
859 }
860 }
861 }
862
863 /// Hardens the active rules when a hard split occurs within them. 744 /// Hardens the active rules when a hard split occurs within them.
864 void _handleHardSplit() { 745 void _handleHardSplit() {
865 if (_rules.isEmpty) return; 746 if (_rules.isEmpty) return;
866 747
867 // If the current rule doesn't care, it will "eat" the hard split and no 748 // If the current rule doesn't care, it will "eat" the hard split and no
868 // others will care either. 749 // others will care either.
869 if (!_rules.last.splitsOnInnerRules) return; 750 if (!_rules.last.splitsOnInnerRules) return;
870 751
871 // Start with the innermost rule. This will traverse the other rules it 752 // Start with the innermost rule. This will traverse the other rules it
872 // constrains. 753 // constrains.
873 _hardSplitRules.add(_rules.last); 754 _hardSplitRules.add(_rules.last);
874 } 755 }
875 756
876 /// Replaces [rules] with hard splits, along with every rule that those 757 /// Replaces all of the previously hardened rules with hard splits, along
877 /// constrain to also split. 758 /// with every rule that those constrain to also split.
878 /// 759 ///
879 /// This should only be called after all chunks have been written. 760 /// This should only be called after all chunks have been written.
880 void _hardenRules(Set<Rule> rules) { 761 void _hardenRules() {
881 if (rules.isEmpty) return; 762 if (_hardSplitRules.isEmpty) return;
882 763
883 // Harden all of the rules that are constrained by [rules] as well. 764 // Harden all of the rules that are constrained by [rules] as well.
884 var hardenedRules = new Set(); 765 var hardenedRules = new Set();
885 walkConstraints(rule) { 766 walkConstraints(rule) {
886 if (hardenedRules.contains(rule)) return; 767 if (hardenedRules.contains(rule)) return;
887 hardenedRules.add(rule); 768 hardenedRules.add(rule);
888 769
889 // Follow this rule's constraints, recursively. 770 // Follow this rule's constraints, recursively.
890 for (var other in _ruleChunks.keys) { 771 for (var other in _ruleChunks.keys) {
891 if (other == rule) continue; 772 if (other == rule) continue;
892 773
893 if (rule.constrain(rule.fullySplitValue, other) == 774 if (rule.constrain(rule.fullySplitValue, other) ==
894 other.fullySplitValue) { 775 other.fullySplitValue) {
895 walkConstraints(other); 776 walkConstraints(other);
896 } 777 }
897 } 778 }
898 } 779 }
899 780
900 for (var rule in rules) { 781 for (var rule in _hardSplitRules) {
901 walkConstraints(rule); 782 walkConstraints(rule);
902 } 783 }
903 784
904 // Harden every chunk that uses one of these rules. 785 // Harden every chunk that uses one of these rules.
905 for (var chunk in _chunks) { 786 for (var chunk in _chunks) {
906 if (hardenedRules.contains(chunk.rule)) { 787 if (hardenedRules.contains(chunk.rule)) {
907 chunk.harden(); 788 chunk.harden();
908 } 789 }
909 } 790 }
910 } 791 }
911 } 792 }
OLDNEW
« no previous file with comments | « lib/src/chunk.dart ('k') | lib/src/debug.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698