tests/tokenizer_test.dart - Issue 10916294: switch html5lib to new pkg layout

Unified Diff: tests/tokenizer_test.dart

Issue 10916294: switch html5lib to new pkg layout (Closed) Base URL: https://github.com/dart-lang/html5lib.git@master

Patch Set: Created 8 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tests/tokenizer_test.dart

diff --git a/tests/tokenizer_test.dart b/tests/tokenizer_test.dart

deleted file mode 100644

index 61b6462e3cd23d66a8cdb7b2a341634a880d4119..0000000000000000000000000000000000000000

--- a/tests/tokenizer_test.dart

+++ /dev/null

@@ -1,304 +0,0 @@

-#library('tokenizer_test');

-// Note: mirrors used to match the getattr usage in the original test

-#import('dart:io');

-#import('dart:json');

-#import('dart:mirrors');

-#import('package:unittest/unittest.dart');

-#import('package:unittest/vm_config.dart');

-#import('../lib/char_encodings.dart');

-#import('../lib/constants.dart', prefix: 'constants');

-#import('../lib/token.dart');

-#import('../lib/utils.dart');

-#import('../tokenizer.dart');

-#import('support.dart');

-/**

- * This is like [JSON.parse], but it fixes unicode surrogate pairs in the JSON.

- *

- * Without this, the test "expects" incorrect results from the tokenizer.

- * Note: Python's json module decodes these correctly, so this might point at

- * a bug in Dart's [JSON.parse].

- */

-jsonParseUnicode(String input) => jsonFixSurrogatePairs(JSON.parse(input));

-// TODO(jmesserly): this should probably be handled by dart:json

-jsonFixSurrogatePairs(jsonObject) {

- fixSurrogate(object) {

- if (object is String) {

- return decodeUtf16Surrogates(object);

- } else if (object is List) {

- List a = object;

- for (int i = 0; i < a.length; i++) {

- a[i] = fixSurrogate(a[i]);

- }

- } else if (object is Map) {

- Map<String, Object> m = object;

- m.forEach((key, value) {

- var fixedKey = fixSurrogate(key);

- var fixedValue = fixSurrogate(value);

- if (fixedKey !== key) {

- m.remove(key);

- m[fixedKey] = fixedValue;

- } else if (fixedValue !== value) {

- m[fixedKey] = fixedValue;

- }

- });

- }

- return object;

- }

- return fixSurrogate(jsonObject);

-class TokenizerTestParser {

- String _state;

- var _lastStartTag;

- List outputTokens;

- TokenizerTestParser(String initialState, [lastStartTag])

- : _state = initialState,

- _lastStartTag = lastStartTag;

- List parse(stream, [encoding, innerHTML = false]) {

- var tokenizer = new HTMLTokenizer(stream, encoding);

- outputTokens = [];

- // Note: we can't get a closure of the state method. However, we can

- // create a new closure to invoke it via mirrors.

- var mtok = reflect(tokenizer);

- tokenizer.state = () => mtok.invoke(_state, const []).value.reflectee;

- if (_lastStartTag != null) {

- tokenizer.currentToken = new StartTagToken(_lastStartTag);

- }

- while (tokenizer.hasNext()) {

- var token = tokenizer.next();

- switch (token.kind) {

- case TokenKind.characters:

- processCharacters(token);

- break;

- case TokenKind.spaceCharacters:

- processSpaceCharacters(token);

- break;

- case TokenKind.startTag:

- processStartTag(token);

- break;

- case TokenKind.endTag:

- processEndTag(token);

- break;

- case TokenKind.comment:

- processComment(token);

- break;

- case TokenKind.doctype:

- processDoctype(token);

- break;

- case TokenKind.parseError:

- processParseError(token);

- break;

- }

- return outputTokens;

- }

- void processDoctype(DoctypeToken token) {

- outputTokens.add(["DOCTYPE", token.name, token.publicId,

- token.systemId, token.correct]);

- }

- void processStartTag(StartTagToken token) {

- outputTokens.add(["StartTag", token.name,

- makeDict(token.data), token.selfClosing]);

- }

- void processEndTag(EndTagToken token) {

- outputTokens.add(["EndTag", token.name, token.selfClosing]);

- }

- void processComment(Token token) {

- outputTokens.add(["Comment", token.data]);

- }

- void processSpaceCharacters(Token token) {

- processCharacters(token);

- }

- void processCharacters(Token token) {

- outputTokens.add(["Character", token.data]);

- }

- void processEOF(token) {

- }

- void processParseError(Token token) {

- // TODO(jmesserly): when debugging test failures it can be useful to add

- // logging here like `print('ParseError $token');`. It would be nice to

- // use the actual logging library.

- outputTokens.add(["ParseError", token.data]);

- }

-List concatenateCharacterTokens(List tokens) {

- var outputTokens = <List>[];

- for (var token in tokens) {

- if (token.indexOf("ParseError") == -1 && token[0] == "Character") {

- if (outputTokens.length > 0 &&

- outputTokens.last().indexOf("ParseError") == -1 &&

- outputTokens.last()[0] == "Character") {

- outputTokens.last()[1] = '${outputTokens.last()[1]}${token[1]}';

- } else {

- outputTokens.add(token);

- }

- } else {

- outputTokens.add(token);

- }

- return outputTokens;

-List normalizeTokens(List tokens) {

- // TODO: convert tests to reflect arrays

- for (int i = 0; i < tokens.length; i++) {

- var token = tokens[i];

- if (token[0] == 'ParseError') {

- tokens[i] = token[0];

- }

- return tokens;

-/**

- * Test whether the test has passed or failed

- *

- * If the ignoreErrorOrder flag is set to true we don't test the relative

- * positions of parse errors and non parse errors.

- */

-void expectTokensMatch(List expectedTokens, List receivedTokens,

- bool ignoreErrorOrder, [bool ignoreErrors = false, String message]) {

- var checkSelfClosing = false;

- for (var token in expectedTokens) {

- if (token[0] == "StartTag" && token.length == 4

- || token[0] == "EndTag" && token.length == 3) {

- checkSelfClosing = true;

- break;

- }

- if (!checkSelfClosing) {

- for (var token in receivedTokens) {

- if (token[0] == "StartTag" || token[0] == "EndTag") {

- token.removeLast();

- }

- if (!ignoreErrorOrder && !ignoreErrors) {

- expect(receivedTokens, equals(expectedTokens), message);

- } else {

- // Sort the tokens into two groups; non-parse errors and parse errors

- var expectedParseErrors = expectedTokens.filter((t) => t == "ParseError");

- var expectedNonErrors = expectedTokens.filter((t) => t != "ParseError");

- var receivedParseErrors = receivedTokens.filter((t) => t == "ParseError");

- var receivedNonErrors = receivedTokens.filter((t) => t != "ParseError");

- expect(receivedNonErrors, equals(expectedNonErrors), message);

- if (!ignoreErrors) {

- expect(receivedParseErrors, equals(expectedParseErrors), message);

- }

-void runTokenizerTest(Map testInfo) {

- // XXX - move this out into the setup function

- // concatenate all consecutive character tokens into a single token

- if (testInfo.containsKey('doubleEscaped')) {

- testInfo = unescape(testInfo);

- }

- var expected = concatenateCharacterTokens(testInfo['output']);

- if (!testInfo.containsKey('lastStartTag')) {

- testInfo['lastStartTag'] = null;

- }

- var parser = new TokenizerTestParser(testInfo['initialState'],

- testInfo['lastStartTag']);

- var tokens = parser.parse(testInfo['input']);

- tokens = concatenateCharacterTokens(tokens);

- var received = normalizeTokens(tokens);

- var errorMsg = Strings.join(["\n\nInitial state:",

- testInfo['initialState'],

- "\nInput:", testInfo['input'],

- "\nExpected:", expected,

- "\nreceived:", tokens].map((s) => '$s'), '\n');

- var ignoreErrorOrder = testInfo['ignoreErrorOrder'];

- if (ignoreErrorOrder == null) ignoreErrorOrder = false;

- expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);

-Map unescape(Map testInfo) {

- // Note: using JSON.parse to unescape the unicode characters in the string.

- decode(inp) => jsonParseUnicode('"${inp}"');

- testInfo["input"] = decode(testInfo["input"]);

- for (var token in testInfo["output"]) {

- if (token == "ParseError") {

- continue;

- } else {

- token[1] = decode(token[1]);

- if (token.length > 2) {

- for (var pair in token[2]) {

- var key = pair[0];

- var value = pair[1];

- token[2].remove(key);

- token[2][decode(key)] = decode(value);

- }

- return testInfo;

-String camelCase(String s) {

- s = s.toLowerCase();

- var result = new StringBuffer();

- for (var match in const RegExp(@"\W+(\w)(\w+)").allMatches(s)) {

- if (result.length == 0) result.add(s.substring(0, match.start()));

- result.add(match.group(1).toUpperCase());

- result.add(match.group(2));

- }

- return result.toString();

-void main() {

- useVmConfiguration();

- getDataFiles('tokenizer', (p) => p.endsWith('.test')).then((files) {

- for (var path in files) {

- var text = new File(path).readAsTextSync();

- var tests = jsonParseUnicode(text);

- var testName = new Path.fromNative(path).filename.replaceAll(".test","");

- var testList = tests['tests'];

- if (testList == null) continue;

- group(testName, () {

- for (int index = 0; index < testList.length; index++) {

- final testInfo = testList[index];

- testInfo.putIfAbsent("initialStates", () => ["Data state"]);

- for (var initialState in testInfo["initialStates"]) {

- test(testInfo["description"], () {

- testInfo["initialState"] = camelCase(initialState);

- runTokenizerTest(testInfo);

- });

- }

- });

- }

- });

« no previous file with comments | « tests/support.dart ('k') | tokenizer.dart » ('j') | no next file with comments »