V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
uri.cc
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/uri.h"
6 
7 #include <vector>
8 
9 #include "src/char-predicates-inl.h"
10 #include "src/isolate-inl.h"
11 #include "src/string-search.h"
12 #include "src/unicode-inl.h"
13 
14 namespace v8 {
15 namespace internal {
16 
17 namespace { // anonymous namespace for DecodeURI helper functions
18 bool IsReservedPredicate(uc16 c) {
19  switch (c) {
20  case '#':
21  case '$':
22  case '&':
23  case '+':
24  case ',':
25  case '/':
26  case ':':
27  case ';':
28  case '=':
29  case '?':
30  case '@':
31  return true;
32  default:
33  return false;
34  }
35 }
36 
37 bool IsReplacementCharacter(const uint8_t* octets, int length) {
38  // The replacement character is at codepoint U+FFFD in the Unicode Specials
39  // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.
40  if (length != 3 || octets[0] != 0xEF || octets[1] != 0xBF ||
41  octets[2] != 0xBD) {
42  return false;
43  }
44  return true;
45 }
46 
47 bool DecodeOctets(const uint8_t* octets, int length,
48  std::vector<uc16>* buffer) {
49  size_t cursor = 0;
50  uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);
51  if (value == unibrow::Utf8::kBadChar &&
52  !IsReplacementCharacter(octets, length)) {
53  return false;
54  }
55 
56  if (value <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
57  buffer->push_back(value);
58  } else {
59  buffer->push_back(unibrow::Utf16::LeadSurrogate(value));
60  buffer->push_back(unibrow::Utf16::TrailSurrogate(value));
61  }
62  return true;
63 }
64 
65 int TwoDigitHex(uc16 character1, uc16 character2) {
66  if (character1 > 'f') return -1;
67  int high = HexValue(character1);
68  if (high == -1) return -1;
69  if (character2 > 'f') return -1;
70  int low = HexValue(character2);
71  if (low == -1) return -1;
72  return (high << 4) + low;
73 }
74 
75 template <typename T>
76 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,
77  bool is_uri, std::vector<T>* buffer) {
78  if (is_uri && IsReservedPredicate(decoded)) {
79  buffer->push_back('%');
80  uc16 first = uri_content->Get(index + 1);
81  uc16 second = uri_content->Get(index + 2);
82  DCHECK_GT(std::numeric_limits<T>::max(), first);
83  DCHECK_GT(std::numeric_limits<T>::max(), second);
84 
85  buffer->push_back(first);
86  buffer->push_back(second);
87  } else {
88  buffer->push_back(decoded);
89  }
90 }
91 
92 bool IntoTwoByte(int index, bool is_uri, int uri_length,
93  String::FlatContent* uri_content, std::vector<uc16>* buffer) {
94  for (int k = index; k < uri_length; k++) {
95  uc16 code = uri_content->Get(k);
96  if (code == '%') {
97  int two_digits;
98  if (k + 2 >= uri_length ||
99  (two_digits = TwoDigitHex(uri_content->Get(k + 1),
100  uri_content->Get(k + 2))) < 0) {
101  return false;
102  }
103  k += 2;
104  uc16 decoded = static_cast<uc16>(two_digits);
105  if (decoded > unibrow::Utf8::kMaxOneByteChar) {
106  uint8_t octets[unibrow::Utf8::kMaxEncodedSize];
107  octets[0] = decoded;
108 
109  int number_of_continuation_bytes = 0;
110  while ((decoded << ++number_of_continuation_bytes) & 0x80) {
111  if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) {
112  return false;
113  }
114  if (uri_content->Get(++k) != '%' ||
115  (two_digits = TwoDigitHex(uri_content->Get(k + 1),
116  uri_content->Get(k + 2))) < 0) {
117  return false;
118  }
119  k += 2;
120  uc16 continuation_byte = static_cast<uc16>(two_digits);
121  octets[number_of_continuation_bytes] = continuation_byte;
122  }
123 
124  if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) {
125  return false;
126  }
127  } else {
128  AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer);
129  }
130  } else {
131  buffer->push_back(code);
132  }
133  }
134  return true;
135 }
136 
137 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
138  std::vector<uint8_t>* one_byte_buffer,
139  std::vector<uc16>* two_byte_buffer) {
140  DisallowHeapAllocation no_gc;
141  String::FlatContent uri_content = uri->GetFlatContent();
142 
143  int uri_length = uri->length();
144  for (int k = 0; k < uri_length; k++) {
145  uc16 code = uri_content.Get(k);
146  if (code == '%') {
147  int two_digits;
148  if (k + 2 >= uri_length ||
149  (two_digits = TwoDigitHex(uri_content.Get(k + 1),
150  uri_content.Get(k + 2))) < 0) {
151  return false;
152  }
153 
154  uc16 decoded = static_cast<uc16>(two_digits);
155  if (decoded > unibrow::Utf8::kMaxOneByteChar) {
156  return IntoTwoByte(k, is_uri, uri_length, &uri_content,
157  two_byte_buffer);
158  }
159 
160  AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
161  k += 2;
162  } else {
163  if (code > unibrow::Utf8::kMaxOneByteChar) {
164  return IntoTwoByte(k, is_uri, uri_length, &uri_content,
165  two_byte_buffer);
166  }
167  one_byte_buffer->push_back(code);
168  }
169  }
170  return true;
171 }
172 
173 } // anonymous namespace
174 
175 MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri,
176  bool is_uri) {
177  uri = String::Flatten(isolate, uri);
178  std::vector<uint8_t> one_byte_buffer;
179  std::vector<uc16> two_byte_buffer;
180 
181  if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
182  THROW_NEW_ERROR(isolate, NewURIError(), String);
183  }
184 
185  if (two_byte_buffer.empty()) {
186  return isolate->factory()->NewStringFromOneByte(Vector<const uint8_t>(
187  one_byte_buffer.data(), static_cast<int>(one_byte_buffer.size())));
188  }
189 
190  Handle<SeqTwoByteString> result;
191  int result_length =
192  static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size());
193  ASSIGN_RETURN_ON_EXCEPTION(
194  isolate, result, isolate->factory()->NewRawTwoByteString(result_length),
195  String);
196 
197  DisallowHeapAllocation no_gc;
198  CopyChars(result->GetChars(), one_byte_buffer.data(), one_byte_buffer.size());
199  CopyChars(result->GetChars() + one_byte_buffer.size(), two_byte_buffer.data(),
200  two_byte_buffer.size());
201 
202  return result;
203 }
204 
205 namespace { // anonymous namespace for EncodeURI helper functions
206 bool IsUnescapePredicateInUriComponent(uc16 c) {
207  if (IsAlphaNumeric(c)) {
208  return true;
209  }
210 
211  switch (c) {
212  case '!':
213  case '\'':
214  case '(':
215  case ')':
216  case '*':
217  case '-':
218  case '.':
219  case '_':
220  case '~':
221  return true;
222  default:
223  return false;
224  }
225 }
226 
227 bool IsUriSeparator(uc16 c) {
228  switch (c) {
229  case '#':
230  case ':':
231  case ';':
232  case '/':
233  case '?':
234  case '$':
235  case '&':
236  case '+':
237  case ',':
238  case '@':
239  case '=':
240  return true;
241  default:
242  return false;
243  }
244 }
245 
246 void AddEncodedOctetToBuffer(uint8_t octet, std::vector<uint8_t>* buffer) {
247  buffer->push_back('%');
248  buffer->push_back(HexCharOfValue(octet >> 4));
249  buffer->push_back(HexCharOfValue(octet & 0x0F));
250 }
251 
252 void EncodeSingle(uc16 c, std::vector<uint8_t>* buffer) {
253  char s[4] = {};
254  int number_of_bytes;
255  number_of_bytes =
256  unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
257  for (int k = 0; k < number_of_bytes; k++) {
258  AddEncodedOctetToBuffer(s[k], buffer);
259  }
260 }
261 
262 void EncodePair(uc16 cc1, uc16 cc2, std::vector<uint8_t>* buffer) {
263  char s[4] = {};
264  int number_of_bytes =
265  unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
266  unibrow::Utf16::kNoPreviousCharacter, false);
267  for (int k = 0; k < number_of_bytes; k++) {
268  AddEncodedOctetToBuffer(s[k], buffer);
269  }
270 }
271 
272 } // anonymous namespace
273 
274 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
275  bool is_uri) {
276  uri = String::Flatten(isolate, uri);
277  int uri_length = uri->length();
278  std::vector<uint8_t> buffer;
279  buffer.reserve(uri_length);
280 
281  {
282  DisallowHeapAllocation no_gc;
283  String::FlatContent uri_content = uri->GetFlatContent();
284 
285  for (int k = 0; k < uri_length; k++) {
286  uc16 cc1 = uri_content.Get(k);
287  if (unibrow::Utf16::IsLeadSurrogate(cc1)) {
288  k++;
289  if (k < uri_length) {
290  uc16 cc2 = uri->Get(k);
291  if (unibrow::Utf16::IsTrailSurrogate(cc2)) {
292  EncodePair(cc1, cc2, &buffer);
293  continue;
294  }
295  }
296  } else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) {
297  if (IsUnescapePredicateInUriComponent(cc1) ||
298  (is_uri && IsUriSeparator(cc1))) {
299  buffer.push_back(cc1);
300  } else {
301  EncodeSingle(cc1, &buffer);
302  }
303  continue;
304  }
305 
306  AllowHeapAllocation allocate_error_and_return;
307  THROW_NEW_ERROR(isolate, NewURIError(), String);
308  }
309  }
310 
311  return isolate->factory()->NewStringFromOneByte(VectorOf(buffer));
312 }
313 
314 namespace { // Anonymous namespace for Escape and Unescape
315 
316 template <typename Char>
317 int UnescapeChar(Vector<const Char> vector, int i, int length, int* step) {
318  uint16_t character = vector[i];
319  int32_t hi = 0;
320  int32_t lo = 0;
321  if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
322  (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 &&
323  (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) {
324  *step = 6;
325  return (hi << 8) + lo;
326  } else if (character == '%' && i <= length - 3 &&
327  (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) {
328  *step = 3;
329  return lo;
330  } else {
331  *step = 1;
332  return character;
333  }
334 }
335 
336 template <typename Char>
337 MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String> string,
338  int start_index) {
339  bool one_byte = true;
340  int length = string->length();
341 
342  int unescaped_length = 0;
343  {
344  DisallowHeapAllocation no_allocation;
345  Vector<const Char> vector = string->GetCharVector<Char>();
346  for (int i = start_index; i < length; unescaped_length++) {
347  int step;
348  if (UnescapeChar(vector, i, length, &step) >
349  String::kMaxOneByteCharCode) {
350  one_byte = false;
351  }
352  i += step;
353  }
354  }
355 
356  DCHECK(start_index < length);
357  Handle<String> first_part =
358  isolate->factory()->NewProperSubString(string, 0, start_index);
359 
360  int dest_position = 0;
361  Handle<String> second_part;
362  DCHECK_LE(unescaped_length, String::kMaxLength);
363  if (one_byte) {
364  Handle<SeqOneByteString> dest = isolate->factory()
365  ->NewRawOneByteString(unescaped_length)
366  .ToHandleChecked();
367  DisallowHeapAllocation no_allocation;
368  Vector<const Char> vector = string->GetCharVector<Char>();
369  for (int i = start_index; i < length; dest_position++) {
370  int step;
371  dest->SeqOneByteStringSet(dest_position,
372  UnescapeChar(vector, i, length, &step));
373  i += step;
374  }
375  second_part = dest;
376  } else {
377  Handle<SeqTwoByteString> dest = isolate->factory()
378  ->NewRawTwoByteString(unescaped_length)
379  .ToHandleChecked();
380  DisallowHeapAllocation no_allocation;
381  Vector<const Char> vector = string->GetCharVector<Char>();
382  for (int i = start_index; i < length; dest_position++) {
383  int step;
384  dest->SeqTwoByteStringSet(dest_position,
385  UnescapeChar(vector, i, length, &step));
386  i += step;
387  }
388  second_part = dest;
389  }
390  return isolate->factory()->NewConsString(first_part, second_part);
391 }
392 
393 bool IsNotEscaped(uint16_t c) {
394  if (IsAlphaNumeric(c)) {
395  return true;
396  }
397  // @*_+-./
398  switch (c) {
399  case '@':
400  case '*':
401  case '_':
402  case '+':
403  case '-':
404  case '.':
405  case '/':
406  return true;
407  default:
408  return false;
409  }
410 }
411 
412 template <typename Char>
413 static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
414  Handle<String> source) {
415  int index;
416  {
417  DisallowHeapAllocation no_allocation;
418  StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
419  index = search.Search(source->GetCharVector<Char>(), 0);
420  if (index < 0) return source;
421  }
422  return UnescapeSlow<Char>(isolate, source, index);
423 }
424 
425 template <typename Char>
426 static MaybeHandle<String> EscapePrivate(Isolate* isolate,
427  Handle<String> string) {
428  DCHECK(string->IsFlat());
429  int escaped_length = 0;
430  int length = string->length();
431 
432  {
433  DisallowHeapAllocation no_allocation;
434  Vector<const Char> vector = string->GetCharVector<Char>();
435  for (int i = 0; i < length; i++) {
436  uint16_t c = vector[i];
437  if (c >= 256) {
438  escaped_length += 6;
439  } else if (IsNotEscaped(c)) {
440  escaped_length++;
441  } else {
442  escaped_length += 3;
443  }
444 
445  // We don't allow strings that are longer than a maximal length.
446  DCHECK_LT(String::kMaxLength, 0x7FFFFFFF - 6); // Cannot overflow.
447  if (escaped_length > String::kMaxLength) break; // Provoke exception.
448  }
449  }
450 
451  // No length change implies no change. Return original string if no change.
452  if (escaped_length == length) return string;
453 
454  Handle<SeqOneByteString> dest;
455  ASSIGN_RETURN_ON_EXCEPTION(
456  isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
457  String);
458  int dest_position = 0;
459 
460  {
461  DisallowHeapAllocation no_allocation;
462  Vector<const Char> vector = string->GetCharVector<Char>();
463  for (int i = 0; i < length; i++) {
464  uint16_t c = vector[i];
465  if (c >= 256) {
466  dest->SeqOneByteStringSet(dest_position, '%');
467  dest->SeqOneByteStringSet(dest_position + 1, 'u');
468  dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
469  dest->SeqOneByteStringSet(dest_position + 3,
470  HexCharOfValue((c >> 8) & 0xF));
471  dest->SeqOneByteStringSet(dest_position + 4,
472  HexCharOfValue((c >> 4) & 0xF));
473  dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xF));
474  dest_position += 6;
475  } else if (IsNotEscaped(c)) {
476  dest->SeqOneByteStringSet(dest_position, c);
477  dest_position++;
478  } else {
479  dest->SeqOneByteStringSet(dest_position, '%');
480  dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
481  dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xF));
482  dest_position += 3;
483  }
484  }
485  }
486 
487  return dest;
488 }
489 
490 } // Anonymous namespace
491 
492 MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String> string) {
493  Handle<String> result;
494  string = String::Flatten(isolate, string);
495  return string->IsOneByteRepresentationUnderneath()
496  ? EscapePrivate<uint8_t>(isolate, string)
497  : EscapePrivate<uc16>(isolate, string);
498 }
499 
500 MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String> string) {
501  Handle<String> result;
502  string = String::Flatten(isolate, string);
503  return string->IsOneByteRepresentationUnderneath()
504  ? UnescapePrivate<uint8_t>(isolate, string)
505  : UnescapePrivate<uc16>(isolate, string);
506 }
507 
508 } // namespace internal
509 } // namespace v8
Definition: libplatform.h:13