Index: src/uri.h |
diff --git a/src/uri.h b/src/uri.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..c7a6301f1210ac231f84dd7af6406285ace52c8b |
--- /dev/null |
+++ b/src/uri.h |
@@ -0,0 +1,309 @@ |
+// Copyright 2013 the V8 project authors. All rights reserved. |
+// Redistribution and use in source and binary forms, with or without |
+// modification, are permitted provided that the following conditions are |
+// met: |
+// |
+// * Redistributions of source code must retain the above copyright |
+// notice, this list of conditions and the following disclaimer. |
+// * Redistributions in binary form must reproduce the above |
+// copyright notice, this list of conditions and the following |
+// disclaimer in the documentation and/or other materials provided |
+// with the distribution. |
+// * Neither the name of Google Inc. nor the names of its |
+// contributors may be used to endorse or promote products derived |
+// from this software without specific prior written permission. |
+// |
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+ |
+#ifndef V8_URI_H_ |
+#define V8_URI_H_ |
+ |
+#include "v8.h" |
+ |
+#include "string-search.h" |
+#include "v8utils.h" |
+#include "v8conversions.h" |
+ |
+namespace v8 { |
+namespace internal { |
+ |
+ |
+template <typename Char> |
+static INLINE(Vector<const Char> GetCharVector(Handle<String> string)); |
+ |
+ |
+template <> |
+Vector<const uint8_t> GetCharVector(Handle<String> string) { |
+ String::FlatContent flat = string->GetFlatContent(); |
+ ASSERT(flat.IsAscii()); |
+ return flat.ToOneByteVector(); |
+} |
+ |
+ |
+template <> |
+Vector<const uc16> GetCharVector(Handle<String> string) { |
+ String::FlatContent flat = string->GetFlatContent(); |
+ ASSERT(flat.IsTwoByte()); |
+ return flat.ToUC16Vector(); |
+} |
+ |
+ |
+class URIUnescape : public AllStatic { |
+ public: |
+ template<typename Char> |
+ static Handle<String> Unescape(Isolate* isolate, Handle<String> source); |
+ |
+ private: |
+ static const signed char kHexValue['g']; |
+ |
+ template<typename Char> |
+ static Handle<String> UnescapeSlow( |
+ Isolate* isolate, Handle<String> string, int start_index); |
+ |
+ static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); |
+ |
+ template <typename Char> |
+ static INLINE(int UnescapeChar(Vector<const Char> vector, |
+ int i, |
+ int length, |
+ int* step)); |
+}; |
+ |
+ |
+const signed char URIUnescape::kHexValue[] = { |
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
+ -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, |
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
+ -1, 10, 11, 12, 13, 14, 15 }; |
+ |
+ |
+template<typename Char> |
+Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) { |
+ int index; |
+ { AssertNoAllocation no_allocation; |
+ StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%")); |
+ index = search.Search(GetCharVector<Char>(source), 0); |
+ if (index < 0) return source; |
+ } |
+ return UnescapeSlow<Char>(isolate, source, index); |
+} |
+ |
+ |
+template <typename Char> |
+Handle<String> URIUnescape::UnescapeSlow( |
+ Isolate* isolate, Handle<String> string, int start_index) { |
+ bool one_byte = true; |
+ int length = string->length(); |
+ |
+ int unescaped_length = 0; |
+ { AssertNoAllocation no_allocation; |
+ Vector<const Char> vector = GetCharVector<Char>(string); |
+ for (int i = start_index; i < length; unescaped_length++) { |
+ int step; |
+ if (UnescapeChar(vector, i, length, &step) > |
+ String::kMaxOneByteCharCode) { |
+ one_byte = false; |
+ } |
+ i += step; |
+ } |
+ } |
+ |
+ ASSERT(start_index < length); |
+ Handle<String> first_part = |
+ isolate->factory()->NewProperSubString(string, 0, start_index); |
+ |
+ int dest_position = 0; |
+ Handle<String> second_part; |
+ if (one_byte) { |
+ Handle<SeqOneByteString> dest = |
+ isolate->factory()->NewRawOneByteString(unescaped_length); |
+ AssertNoAllocation no_allocation; |
+ Vector<const Char> vector = GetCharVector<Char>(string); |
+ for (int i = start_index; i < length; dest_position++) { |
+ int step; |
+ dest->SeqOneByteStringSet(dest_position, |
+ UnescapeChar(vector, i, length, &step)); |
+ i += step; |
+ } |
+ second_part = dest; |
+ } else { |
+ Handle<SeqTwoByteString> dest = |
+ isolate->factory()->NewRawTwoByteString(unescaped_length); |
+ AssertNoAllocation no_allocation; |
+ Vector<const Char> vector = GetCharVector<Char>(string); |
+ for (int i = start_index; i < length; dest_position++) { |
+ int step; |
+ dest->SeqTwoByteStringSet(dest_position, |
+ UnescapeChar(vector, i, length, &step)); |
+ i += step; |
+ } |
+ second_part = dest; |
+ } |
+ return isolate->factory()->NewConsString(first_part, second_part); |
+} |
+ |
+ |
+int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { |
+ if (character1 > 'f') return -1; |
+ int hi = kHexValue[character1]; |
+ if (hi == -1) return -1; |
+ if (character2 > 'f') return -1; |
+ int lo = kHexValue[character2]; |
+ if (lo == -1) return -1; |
+ return (hi << 4) + lo; |
+} |
+ |
+ |
+template <typename Char> |
+int URIUnescape::UnescapeChar(Vector<const Char> vector, |
+ int i, |
+ int length, |
+ int* step) { |
+ uint16_t character = vector[i]; |
+ int32_t hi = 0; |
+ int32_t lo = 0; |
+ if (character == '%' && |
+ i <= length - 6 && |
+ vector[i + 1] == 'u' && |
+ (hi = TwoDigitHex(vector[i + 2], |
+ vector[i + 3])) != -1 && |
+ (lo = TwoDigitHex(vector[i + 4], |
+ vector[i + 5])) != -1) { |
+ *step = 6; |
+ return (hi << 8) + lo; |
+ } else if (character == '%' && |
+ i <= length - 3 && |
+ (lo = TwoDigitHex(vector[i + 1], |
+ vector[i + 2])) != -1) { |
+ *step = 3; |
+ return lo; |
+ } else { |
+ *step = 1; |
+ return character; |
+ } |
+} |
+ |
+ |
+class URIEscape : public AllStatic { |
+ public: |
+ template<typename Char> |
+ static Handle<String> Escape(Isolate* isolate, Handle<String> string); |
+ |
+ private: |
+ static const char kHexChars[17]; |
+ static const char kNotEscaped[256]; |
+ |
+ static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } |
+}; |
+ |
+ |
+const char URIEscape::kHexChars[] = "0123456789ABCDEF"; |
+ |
+ |
+// kNotEscaped is generated by the following: |
+// |
+// #!/bin/perl |
+// for (my $i = 0; $i < 256; $i++) { |
+// print "\n" if $i % 16 == 0; |
+// my $c = chr($i); |
+// my $escaped = 1; |
+// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; |
+// print $escaped ? "0, " : "1, "; |
+// } |
+ |
+const char URIEscape::kNotEscaped[] = { |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
+ |
+ |
+template<typename Char> |
+Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { |
+ ASSERT(string->IsFlat()); |
+ int escaped_length = 0; |
+ int length = string->length(); |
+ |
+ { AssertNoAllocation no_allocation; |
+ Vector<const Char> vector = GetCharVector<Char>(string); |
+ for (int i = 0; i < length; i++) { |
+ uint16_t c = vector[i]; |
+ if (c >= 256) { |
+ escaped_length += 6; |
+ } else if (IsNotEscaped(c)) { |
+ escaped_length++; |
+ } else { |
+ escaped_length += 3; |
+ } |
+ |
+ // We don't allow strings that are longer than a maximal length. |
+ ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow. |
+ if (escaped_length > String::kMaxLength) { |
+ isolate->context()->mark_out_of_memory(); |
+ return Handle<String>::null(); |
+ } |
+ } |
+ } |
+ |
+ // No length change implies no change. Return original string if no change. |
+ if (escaped_length == length) return string; |
+ |
+ Handle<SeqOneByteString> dest = |
+ isolate->factory()->NewRawOneByteString(escaped_length); |
+ int dest_position = 0; |
+ |
+ { AssertNoAllocation no_allocation; |
+ Vector<const Char> vector = GetCharVector<Char>(string); |
+ for (int i = 0; i < length; i++) { |
+ uint16_t c = vector[i]; |
+ if (c >= 256) { |
+ dest->SeqOneByteStringSet(dest_position, '%'); |
+ dest->SeqOneByteStringSet(dest_position+1, 'u'); |
+ dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]); |
+ dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]); |
+ dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]); |
+ dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]); |
+ dest_position += 6; |
+ } else if (IsNotEscaped(c)) { |
+ dest->SeqOneByteStringSet(dest_position, c); |
+ dest_position++; |
+ } else { |
+ dest->SeqOneByteStringSet(dest_position, '%'); |
+ dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]); |
+ dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]); |
+ dest_position += 3; |
+ } |
+ } |
+ } |
+ |
+ return dest; |
+} |
+ |
+} } // namespace v8::internal |
+ |
+#endif // V8_URI_H_ |