9 #include "src/char-predicates-inl.h" 10 #include "src/isolate-inl.h" 11 #include "src/string-search.h" 12 #include "src/unicode-inl.h" 18 bool IsReservedPredicate(uc16 c) {
37 bool IsReplacementCharacter(
const uint8_t* octets,
int length) {
40 if (length != 3 || octets[0] != 0xEF || octets[1] != 0xBF ||
47 bool DecodeOctets(
const uint8_t* octets,
int length,
48 std::vector<uc16>* buffer) {
50 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);
51 if (value == unibrow::Utf8::kBadChar &&
52 !IsReplacementCharacter(octets, length)) {
56 if (value <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
57 buffer->push_back(value);
59 buffer->push_back(unibrow::Utf16::LeadSurrogate(value));
60 buffer->push_back(unibrow::Utf16::TrailSurrogate(value));
65 int TwoDigitHex(uc16 character1, uc16 character2) {
66 if (character1 >
'f')
return -1;
67 int high = HexValue(character1);
68 if (high == -1)
return -1;
69 if (character2 >
'f')
return -1;
70 int low = HexValue(character2);
71 if (low == -1)
return -1;
72 return (high << 4) + low;
76 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content,
int index,
77 bool is_uri, std::vector<T>* buffer) {
78 if (is_uri && IsReservedPredicate(decoded)) {
79 buffer->push_back(
'%');
80 uc16 first = uri_content->Get(index + 1);
81 uc16 second = uri_content->Get(index + 2);
82 DCHECK_GT(std::numeric_limits<T>::max(), first);
83 DCHECK_GT(std::numeric_limits<T>::max(), second);
85 buffer->push_back(first);
86 buffer->push_back(second);
88 buffer->push_back(decoded);
92 bool IntoTwoByte(
int index,
bool is_uri,
int uri_length,
93 String::FlatContent* uri_content, std::vector<uc16>* buffer) {
94 for (
int k = index; k < uri_length; k++) {
95 uc16 code = uri_content->Get(k);
98 if (k + 2 >= uri_length ||
99 (two_digits = TwoDigitHex(uri_content->Get(k + 1),
100 uri_content->Get(k + 2))) < 0) {
104 uc16 decoded =
static_cast<uc16
>(two_digits);
105 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
106 uint8_t octets[unibrow::Utf8::kMaxEncodedSize];
109 int number_of_continuation_bytes = 0;
110 while ((decoded << ++number_of_continuation_bytes) & 0x80) {
111 if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) {
114 if (uri_content->Get(++k) !=
'%' ||
115 (two_digits = TwoDigitHex(uri_content->Get(k + 1),
116 uri_content->Get(k + 2))) < 0) {
120 uc16 continuation_byte =
static_cast<uc16
>(two_digits);
121 octets[number_of_continuation_bytes] = continuation_byte;
124 if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) {
128 AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer);
131 buffer->push_back(code);
137 bool IntoOneAndTwoByte(Handle<String> uri,
bool is_uri,
138 std::vector<uint8_t>* one_byte_buffer,
139 std::vector<uc16>* two_byte_buffer) {
140 DisallowHeapAllocation no_gc;
141 String::FlatContent uri_content = uri->GetFlatContent();
143 int uri_length = uri->length();
144 for (
int k = 0; k < uri_length; k++) {
145 uc16 code = uri_content.Get(k);
148 if (k + 2 >= uri_length ||
149 (two_digits = TwoDigitHex(uri_content.Get(k + 1),
150 uri_content.Get(k + 2))) < 0) {
154 uc16 decoded =
static_cast<uc16
>(two_digits);
155 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
156 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
160 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
163 if (code > unibrow::Utf8::kMaxOneByteChar) {
164 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
167 one_byte_buffer->push_back(code);
175 MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri,
177 uri = String::Flatten(isolate, uri);
178 std::vector<uint8_t> one_byte_buffer;
179 std::vector<uc16> two_byte_buffer;
181 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
182 THROW_NEW_ERROR(isolate, NewURIError(), String);
185 if (two_byte_buffer.empty()) {
186 return isolate->factory()->NewStringFromOneByte(Vector<const uint8_t>(
187 one_byte_buffer.data(),
static_cast<int>(one_byte_buffer.size())));
190 Handle<SeqTwoByteString> result;
192 static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size());
193 ASSIGN_RETURN_ON_EXCEPTION(
194 isolate, result, isolate->factory()->NewRawTwoByteString(result_length),
197 DisallowHeapAllocation no_gc;
198 CopyChars(result->GetChars(), one_byte_buffer.data(), one_byte_buffer.size());
199 CopyChars(result->GetChars() + one_byte_buffer.size(), two_byte_buffer.data(),
200 two_byte_buffer.size());
206 bool IsUnescapePredicateInUriComponent(uc16 c) {
207 if (IsAlphaNumeric(c)) {
227 bool IsUriSeparator(uc16 c) {
246 void AddEncodedOctetToBuffer(uint8_t octet, std::vector<uint8_t>* buffer) {
247 buffer->push_back(
'%');
248 buffer->push_back(HexCharOfValue(octet >> 4));
249 buffer->push_back(HexCharOfValue(octet & 0x0F));
252 void EncodeSingle(uc16 c, std::vector<uint8_t>* buffer) {
256 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter,
false);
257 for (
int k = 0; k < number_of_bytes; k++) {
258 AddEncodedOctetToBuffer(s[k], buffer);
262 void EncodePair(uc16 cc1, uc16 cc2, std::vector<uint8_t>* buffer) {
264 int number_of_bytes =
265 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
266 unibrow::Utf16::kNoPreviousCharacter,
false);
267 for (
int k = 0; k < number_of_bytes; k++) {
268 AddEncodedOctetToBuffer(s[k], buffer);
274 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
276 uri = String::Flatten(isolate, uri);
277 int uri_length = uri->length();
278 std::vector<uint8_t> buffer;
279 buffer.reserve(uri_length);
282 DisallowHeapAllocation no_gc;
283 String::FlatContent uri_content = uri->GetFlatContent();
285 for (
int k = 0; k < uri_length; k++) {
286 uc16 cc1 = uri_content.Get(k);
287 if (unibrow::Utf16::IsLeadSurrogate(cc1)) {
289 if (k < uri_length) {
290 uc16 cc2 = uri->Get(k);
291 if (unibrow::Utf16::IsTrailSurrogate(cc2)) {
292 EncodePair(cc1, cc2, &buffer);
296 }
else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) {
297 if (IsUnescapePredicateInUriComponent(cc1) ||
298 (is_uri && IsUriSeparator(cc1))) {
299 buffer.push_back(cc1);
301 EncodeSingle(cc1, &buffer);
306 AllowHeapAllocation allocate_error_and_return;
307 THROW_NEW_ERROR(isolate, NewURIError(), String);
311 return isolate->factory()->NewStringFromOneByte(VectorOf(buffer));
316 template <
typename Char>
317 int UnescapeChar(Vector<const Char> vector,
int i,
int length,
int* step) {
318 uint16_t character = vector[
i];
321 if (character ==
'%' &&
i <= length - 6 && vector[
i + 1] ==
'u' &&
322 (hi = TwoDigitHex(vector[
i + 2], vector[
i + 3])) > -1 &&
323 (lo = TwoDigitHex(vector[
i + 4], vector[
i + 5])) > -1) {
325 return (hi << 8) + lo;
326 }
else if (character ==
'%' &&
i <= length - 3 &&
327 (lo = TwoDigitHex(vector[
i + 1], vector[
i + 2])) > -1) {
336 template <
typename Char>
337 MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String>
string,
339 bool one_byte =
true;
340 int length =
string->length();
342 int unescaped_length = 0;
344 DisallowHeapAllocation no_allocation;
345 Vector<const Char> vector =
string->GetCharVector<Char>();
346 for (
int i = start_index;
i < length; unescaped_length++) {
348 if (UnescapeChar(vector,
i, length, &step) >
349 String::kMaxOneByteCharCode) {
356 DCHECK(start_index < length);
357 Handle<String> first_part =
358 isolate->factory()->NewProperSubString(
string, 0, start_index);
360 int dest_position = 0;
361 Handle<String> second_part;
362 DCHECK_LE(unescaped_length, String::kMaxLength);
364 Handle<SeqOneByteString> dest = isolate->factory()
365 ->NewRawOneByteString(unescaped_length)
367 DisallowHeapAllocation no_allocation;
368 Vector<const Char> vector =
string->GetCharVector<Char>();
369 for (
int i = start_index;
i < length; dest_position++) {
371 dest->SeqOneByteStringSet(dest_position,
372 UnescapeChar(vector,
i, length, &step));
377 Handle<SeqTwoByteString> dest = isolate->factory()
378 ->NewRawTwoByteString(unescaped_length)
380 DisallowHeapAllocation no_allocation;
381 Vector<const Char> vector =
string->GetCharVector<Char>();
382 for (
int i = start_index;
i < length; dest_position++) {
384 dest->SeqTwoByteStringSet(dest_position,
385 UnescapeChar(vector,
i, length, &step));
390 return isolate->factory()->NewConsString(first_part, second_part);
393 bool IsNotEscaped(uint16_t c) {
394 if (IsAlphaNumeric(c)) {
412 template <
typename Char>
413 static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
414 Handle<String> source) {
417 DisallowHeapAllocation no_allocation;
418 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR(
"%"));
419 index = search.Search(source->GetCharVector<Char>(), 0);
420 if (index < 0)
return source;
422 return UnescapeSlow<Char>(isolate, source, index);
425 template <
typename Char>
426 static MaybeHandle<String> EscapePrivate(Isolate* isolate,
427 Handle<String>
string) {
428 DCHECK(string->IsFlat());
429 int escaped_length = 0;
430 int length =
string->length();
433 DisallowHeapAllocation no_allocation;
434 Vector<const Char> vector =
string->GetCharVector<Char>();
435 for (
int i = 0;
i < length;
i++) {
436 uint16_t c = vector[
i];
439 }
else if (IsNotEscaped(c)) {
446 DCHECK_LT(String::kMaxLength, 0x7FFFFFFF - 6);
447 if (escaped_length > String::kMaxLength)
break;
452 if (escaped_length == length)
return string;
454 Handle<SeqOneByteString> dest;
455 ASSIGN_RETURN_ON_EXCEPTION(
456 isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
458 int dest_position = 0;
461 DisallowHeapAllocation no_allocation;
462 Vector<const Char> vector =
string->GetCharVector<Char>();
463 for (
int i = 0;
i < length;
i++) {
464 uint16_t c = vector[
i];
466 dest->SeqOneByteStringSet(dest_position,
'%');
467 dest->SeqOneByteStringSet(dest_position + 1,
'u');
468 dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
469 dest->SeqOneByteStringSet(dest_position + 3,
470 HexCharOfValue((c >> 8) & 0xF));
471 dest->SeqOneByteStringSet(dest_position + 4,
472 HexCharOfValue((c >> 4) & 0xF));
473 dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xF));
475 }
else if (IsNotEscaped(c)) {
476 dest->SeqOneByteStringSet(dest_position, c);
479 dest->SeqOneByteStringSet(dest_position,
'%');
480 dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
481 dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xF));
492 MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String>
string) {
493 Handle<String> result;
494 string = String::Flatten(isolate,
string);
495 return string->IsOneByteRepresentationUnderneath()
496 ? EscapePrivate<uint8_t>(isolate, string)
497 : EscapePrivate<uc16>(isolate,
string);
500 MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String>
string) {
501 Handle<String> result;
502 string = String::Flatten(isolate,
string);
503 return string->IsOneByteRepresentationUnderneath()
504 ? UnescapePrivate<uint8_t>(isolate, string)
505 : UnescapePrivate<uc16>(isolate,
string);