Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(300)

Side by Side Diff: src/scanner-character-streams.h

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 18 matching lines...) Expand all
29 #define V8_SCANNER_CHARACTER_STREAMS_H_ 29 #define V8_SCANNER_CHARACTER_STREAMS_H_
30 30
31 #include "scanner.h" 31 #include "scanner.h"
32 32
33 namespace v8 { 33 namespace v8 {
34 namespace internal { 34 namespace internal {
35 35
36 // A buffered character stream based on a random access character 36 // A buffered character stream based on a random access character
37 // source (ReadBlock can be called with pos_ pointing to any position, 37 // source (ReadBlock can be called with pos_ pointing to any position,
38 // even positions before the current). 38 // even positions before the current).
39 class BufferedUC16CharacterStream: public UC16CharacterStream { 39 class BufferedUtf16CharacterStream: public Utf16CharacterStream {
40 public: 40 public:
41 BufferedUC16CharacterStream(); 41 BufferedUtf16CharacterStream();
42 virtual ~BufferedUC16CharacterStream(); 42 virtual ~BufferedUtf16CharacterStream();
43 43
44 virtual void PushBack(uc32 character); 44 virtual void PushBack(uc32 character);
45 45
46 protected: 46 protected:
47 static const unsigned kBufferSize = 512; 47 static const unsigned kBufferSize = 512;
48 static const unsigned kPushBackStepSize = 16; 48 static const unsigned kPushBackStepSize = 16;
49 49
50 virtual unsigned SlowSeekForward(unsigned delta); 50 virtual unsigned SlowSeekForward(unsigned delta);
51 virtual bool ReadBlock(); 51 virtual bool ReadBlock();
52 virtual void SlowPushBack(uc16 character); 52 virtual void SlowPushBack(uc16 character);
53 53
54 virtual unsigned BufferSeekForward(unsigned delta) = 0; 54 virtual unsigned BufferSeekForward(unsigned delta) = 0;
55 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; 55 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
56 56
57 const uc16* pushback_limit_; 57 const uc16* pushback_limit_;
58 uc16 buffer_[kBufferSize]; 58 uc16 buffer_[kBufferSize];
59 }; 59 };
60 60
61 61
62 // Generic string stream. 62 // Generic string stream.
63 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { 63 class GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
64 public: 64 public:
65 GenericStringUC16CharacterStream(Handle<String> data, 65 GenericStringUtf16CharacterStream(Handle<String> data,
66 unsigned start_position, 66 unsigned start_position,
67 unsigned end_position); 67 unsigned end_position);
68 virtual ~GenericStringUC16CharacterStream(); 68 virtual ~GenericStringUtf16CharacterStream();
69 69
70 protected: 70 protected:
71 virtual unsigned BufferSeekForward(unsigned delta); 71 virtual unsigned BufferSeekForward(unsigned delta);
72 virtual unsigned FillBuffer(unsigned position, unsigned length); 72 virtual unsigned FillBuffer(unsigned position, unsigned length);
73 73
74 Handle<String> string_; 74 Handle<String> string_;
75 unsigned start_position_; 75 unsigned start_position_;
76 unsigned length_; 76 unsigned length_;
77 }; 77 };
78 78
79 79
80 // UC16 stream based on a literal UTF-8 string. 80 // Utf16 stream based on a literal UTF-8 string.
81 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { 81 class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
82 public: 82 public:
83 Utf8ToUC16CharacterStream(const byte* data, unsigned length); 83 Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
84 virtual ~Utf8ToUC16CharacterStream(); 84 virtual ~Utf8ToUtf16CharacterStream();
85 85
86 protected: 86 protected:
87 virtual unsigned BufferSeekForward(unsigned delta); 87 virtual unsigned BufferSeekForward(unsigned delta);
88 virtual unsigned FillBuffer(unsigned char_position, unsigned length); 88 virtual unsigned FillBuffer(unsigned char_position, unsigned length);
89 void SetRawPosition(unsigned char_position); 89 void SetRawPosition(unsigned char_position);
90 90
91 const byte* raw_data_; 91 const byte* raw_data_;
92 unsigned raw_data_length_; // Measured in bytes, not characters. 92 unsigned raw_data_length_; // Measured in bytes, not characters.
93 unsigned raw_data_pos_; 93 unsigned raw_data_pos_;
94 // The character position of the character at raw_data[raw_data_pos_]. 94 // The character position of the character at raw_data[raw_data_pos_].
95 // Not necessarily the same as pos_. 95 // Not necessarily the same as pos_.
96 unsigned raw_character_position_; 96 unsigned raw_character_position_;
97 }; 97 };
98 98
99 99
100 // UTF16 buffer to read characters from an external string. 100 // UTF16 buffer to read characters from an external string.
101 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { 101 class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
102 public: 102 public:
103 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, 103 ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
104 int start_position, 104 int start_position,
105 int end_position); 105 int end_position);
106 virtual ~ExternalTwoByteStringUC16CharacterStream(); 106 virtual ~ExternalTwoByteStringUtf16CharacterStream();
107 107
108 virtual void PushBack(uc32 character) { 108 virtual void PushBack(uc32 character) {
109 ASSERT(buffer_cursor_ > raw_data_); 109 ASSERT(buffer_cursor_ > raw_data_);
110 buffer_cursor_--; 110 buffer_cursor_--;
111 pos_--; 111 pos_--;
112 } 112 }
113 113
114 protected: 114 protected:
115 virtual unsigned SlowSeekForward(unsigned delta) { 115 virtual unsigned SlowSeekForward(unsigned delta) {
116 // Fast case always handles seeking. 116 // Fast case always handles seeking.
117 return 0; 117 return 0;
118 } 118 }
119 virtual bool ReadBlock() { 119 virtual bool ReadBlock() {
120 // Entire string is read at start. 120 // Entire string is read at start.
121 return false; 121 return false;
122 } 122 }
123 Handle<ExternalTwoByteString> source_; 123 Handle<ExternalTwoByteString> source_;
124 const uc16* raw_data_; // Pointer to the actual array of characters. 124 const uc16* raw_data_; // Pointer to the actual array of characters.
125 }; 125 };
126 126
127 } } // namespace v8::internal 127 } } // namespace v8::internal
128 128
129 #endif // V8_SCANNER_CHARACTER_STREAMS_H_ 129 #endif // V8_SCANNER_CHARACTER_STREAMS_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698