5 #include "src/inspector/string-16.h" 14 #include "src/base/platform/platform.h" 15 #include "src/conversions.h" 21 bool isASCII(UChar c) {
return !(c & ~0x7F); }
23 bool isSpaceOrNewLine(UChar c) {
24 return isASCII(c) && c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
27 int64_t charactersToInteger(
const UChar* characters,
size_t length,
29 std::vector<char> buffer;
30 buffer.reserve(length + 1);
31 for (
size_t i = 0;
i < length; ++
i) {
32 if (!isASCII(characters[
i])) {
36 buffer.push_back(static_cast<char>(characters[
i]));
38 buffer.push_back(
'\0');
42 static_cast<int64_t>(std::strtoll(buffer.data(), &endptr, 10));
43 if (ok) *ok = !(*endptr);
47 const UChar replacementCharacter = 0xFFFD;
50 inline int inlineUTF8SequenceLengthNonASCII(
char b0) {
51 if ((b0 & 0xC0) != 0xC0)
return 0;
52 if ((b0 & 0xE0) == 0xC0)
return 2;
53 if ((b0 & 0xF0) == 0xE0)
return 3;
54 if ((b0 & 0xF8) == 0xF0)
return 4;
58 inline int inlineUTF8SequenceLength(
char b0) {
59 return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
67 static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0,
77 ConversionResult convertUTF16ToUTF8(
const UChar** sourceStart,
78 const UChar* sourceEnd,
char** targetStart,
79 char* targetEnd,
bool strict) {
80 ConversionResult result = conversionOK;
81 const UChar* source = *sourceStart;
82 char* target = *targetStart;
83 while (source < sourceEnd) {
86 const UChar32 byteMask = 0xBF;
87 const UChar32 byteMark = 0x80;
88 const UChar* oldSource =
90 ch =
static_cast<uint16_t
>(*source++);
92 if (ch >= 0xD800 && ch <= 0xDBFF) {
94 if (source < sourceEnd) {
95 UChar32 ch2 =
static_cast<uint16_t
>(*source);
97 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
98 ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000;
102 result = sourceIllegal;
107 result = sourceExhausted;
112 if (ch >= 0xDC00 && ch <= 0xDFFF) {
114 result = sourceIllegal;
119 if (ch < static_cast<UChar32>(0x80)) {
121 }
else if (ch < static_cast<UChar32>(0x800)) {
123 }
else if (ch < static_cast<UChar32>(0x10000)) {
125 }
else if (ch < static_cast<UChar32>(0x110000)) {
129 ch = replacementCharacter;
132 target += bytesToWrite;
133 if (target > targetEnd) {
135 target -= bytesToWrite;
136 result = targetExhausted;
139 switch (bytesToWrite) {
141 *--target =
static_cast<char>((ch | byteMark) & byteMask);
145 *--target =
static_cast<char>((ch | byteMark) & byteMask);
149 *--target =
static_cast<char>((ch | byteMark) & byteMask);
153 *--target =
static_cast<char>(ch | firstByteMark[bytesToWrite]);
155 target += bytesToWrite;
157 *sourceStart = source;
158 *targetStart = target;
168 #define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF) 176 #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF) 184 #define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800) 193 #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0) 202 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00) 207 static bool isLegalUTF8(
const unsigned char* source,
int length) {
209 const unsigned char* srcptr = source + length;
215 if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
218 if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
221 if ((a = (*--srcptr)) > 0xBF)
return false;
226 if (a < 0xA0)
return false;
229 if (a > 0x9F)
return false;
232 if (a < 0x90)
return false;
235 if (a > 0x8F)
return false;
238 if (a < 0x80)
return false;
243 if (*source >= 0x80 && *source < 0xC2)
return false;
245 if (*source > 0xF4)
return false;
252 static const UChar32 offsetsFromUTF8[6] = {0x00000000UL,
256 static_cast<UChar32
>(0xFA082080UL),
257 static_cast<UChar32>(0x82082080UL)};
259 static inline UChar32 readUTF8Sequence(
const char*& sequence,
size_t length) {
260 UChar32 character = 0;
265 character +=
static_cast<unsigned char>(*sequence++);
269 character +=
static_cast<unsigned char>(*sequence++);
273 character +=
static_cast<unsigned char>(*sequence++);
277 character +=
static_cast<unsigned char>(*sequence++);
281 character +=
static_cast<unsigned char>(*sequence++);
285 character +=
static_cast<unsigned char>(*sequence++);
288 return character - offsetsFromUTF8[length - 1];
291 ConversionResult convertUTF8ToUTF16(
const char** sourceStart,
292 const char* sourceEnd, UChar** targetStart,
293 UChar* targetEnd,
bool* sourceAllASCII,
295 ConversionResult result = conversionOK;
296 const char* source = *sourceStart;
297 UChar* target = *targetStart;
299 while (source < sourceEnd) {
300 int utf8SequenceLength = inlineUTF8SequenceLength(*source);
301 if (sourceEnd - source < utf8SequenceLength) {
302 result = sourceExhausted;
306 if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source),
307 utf8SequenceLength)) {
308 result = sourceIllegal;
312 UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
314 if (target >= targetEnd) {
315 source -= utf8SequenceLength;
316 result = targetExhausted;
320 if (U_IS_BMP(character)) {
322 if (U_IS_SURROGATE(character)) {
324 source -= utf8SequenceLength;
325 result = sourceIllegal;
328 *target++ = replacementCharacter;
329 orAllData |= replacementCharacter;
331 *target++ =
static_cast<UChar
>(character);
332 orAllData |= character;
334 }
else if (U_IS_SUPPLEMENTARY(character)) {
336 if (target + 1 >= targetEnd) {
337 source -= utf8SequenceLength;
338 result = targetExhausted;
341 *target++ = U16_LEAD(character);
342 *target++ = U16_TRAIL(character);
346 source -= utf8SequenceLength;
347 result = sourceIllegal;
350 *target++ = replacementCharacter;
351 orAllData |= replacementCharacter;
355 *sourceStart = source;
356 *targetStart = target;
358 if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F);
365 static inline void putUTF8Triple(
char*& buffer, UChar ch) {
366 *buffer++ =
static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
367 *buffer++ =
static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
368 *buffer++ =
static_cast<char>((ch & 0x3F) | 0x80);
373 String16::String16() =
default;
375 String16::String16(
const String16& other) =
default;
377 String16::String16(String16&& other) V8_NOEXCEPT
378 : m_impl(std::move(other.m_impl)),
379 hash_code(other.hash_code) {}
381 String16::String16(
const UChar* characters,
size_t size)
382 : m_impl(characters, size) {}
384 String16::String16(
const UChar* characters) : m_impl(characters) {}
386 String16::String16(
const char* characters)
387 : String16(characters,
std::strlen(characters)) {}
389 String16::String16(
const char* characters,
size_t size) {
391 for (
size_t i = 0;
i < size; ++
i) m_impl[
i] = characters[
i];
394 String16::String16(
const std::basic_string<UChar>& impl) : m_impl(impl) {}
396 String16& String16::operator=(
const String16& other) =
default;
398 String16& String16::operator=(String16&& other) V8_NOEXCEPT {
399 m_impl = std::move(other.m_impl);
400 hash_code = other.hash_code;
405 String16 String16::fromInteger(
int number) {
408 return String16(IntToCString(number, buffer));
412 String16 String16::fromInteger(
size_t number) {
413 const size_t kBufferSize = 50;
414 char buffer[kBufferSize];
415 #if !defined(_WIN32) && !defined(_WIN64) 416 v8::base::OS::SNPrintF(buffer, kBufferSize,
"%zu", number);
418 v8::base::OS::SNPrintF(buffer, kBufferSize,
"%Iu", number);
420 return String16(buffer);
424 String16 String16::fromDouble(
double number) {
427 return String16(DoubleToCString(number, buffer));
431 String16 String16::fromDouble(
double number,
int precision) {
432 std::unique_ptr<char[]> str(
433 v8::internal::DoubleToPrecisionCString(number, precision));
434 return String16(str.get());
437 int64_t String16::toInteger64(
bool* ok)
const {
438 return charactersToInteger(characters16(), length(), ok);
441 int String16::toInteger(
bool* ok)
const {
442 int64_t result = toInteger64(ok);
444 *ok = result <= std::numeric_limits<int>::max() &&
445 result >= std::numeric_limits<int>::min();
447 return static_cast<int>(result);
450 String16 String16::stripWhiteSpace()
const {
451 if (!length())
return String16();
454 size_t end = length() - 1;
457 while (start <= end && isSpaceOrNewLine(characters16()[start])) ++start;
460 if (start > end)
return String16();
463 while (end && isSpaceOrNewLine(characters16()[end])) --end;
465 if (!start && end == length() - 1)
return *
this;
466 return String16(characters16() + start, end + 1 - start);
469 String16Builder::String16Builder() =
default;
471 void String16Builder::append(
const String16& s) {
472 m_buffer.insert(m_buffer.end(), s.characters16(),
473 s.characters16() + s.length());
476 void String16Builder::append(UChar c) { m_buffer.push_back(c); }
478 void String16Builder::append(
char c) {
480 m_buffer.push_back(u);
483 void String16Builder::append(
const UChar* characters,
size_t length) {
484 m_buffer.insert(m_buffer.end(), characters, characters + length);
487 void String16Builder::append(
const char* characters,
size_t length) {
488 m_buffer.insert(m_buffer.end(), characters, characters + length);
491 void String16Builder::appendNumber(
int number) {
492 constexpr
int kBufferSize = 11;
493 char buffer[kBufferSize];
494 int chars = v8::base::OS::SNPrintF(buffer, kBufferSize,
"%d", number);
496 m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
499 void String16Builder::appendNumber(
size_t number) {
500 constexpr
int kBufferSize = 20;
501 char buffer[kBufferSize];
502 #if !defined(_WIN32) && !defined(_WIN64) 503 int chars = v8::base::OS::SNPrintF(buffer, kBufferSize,
"%zu", number);
505 int chars = v8::base::OS::SNPrintF(buffer, kBufferSize,
"%Iu", number);
508 m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
511 void String16Builder::appendUnsignedAsHex(uint64_t number) {
512 constexpr
int kBufferSize = 17;
513 char buffer[kBufferSize];
515 v8::base::OS::SNPrintF(buffer, kBufferSize,
"%016" PRIx64, number);
517 m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
520 void String16Builder::appendUnsignedAsHex(
uint32_t number) {
521 constexpr
int kBufferSize = 9;
522 char buffer[kBufferSize];
523 int chars = v8::base::OS::SNPrintF(buffer, kBufferSize,
"%08" PRIx32, number);
525 m_buffer.insert(m_buffer.end(), buffer, buffer + chars);
528 String16 String16Builder::toString() {
529 return String16(m_buffer.data(), m_buffer.size());
532 void String16Builder::reserveCapacity(
size_t capacity) {
533 m_buffer.reserve(capacity);
536 String16 String16::fromUTF8(
const char* stringStart,
size_t length) {
537 if (!stringStart || !length)
return String16();
539 std::vector<UChar> buffer(length);
540 UChar* bufferStart = buffer.data();
542 UChar* bufferCurrent = bufferStart;
543 const char* stringCurrent = stringStart;
544 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
545 bufferCurrent + buffer.size(),
nullptr,
546 true) != conversionOK)
549 size_t utf16Length = bufferCurrent - bufferStart;
550 return String16(bufferStart, utf16Length);
553 std::string String16::utf8()
const {
554 size_t length = this->length();
556 if (!length)
return std::string(
"");
568 if (length > std::numeric_limits<unsigned>::max() / 3)
return std::string();
569 std::vector<char> bufferVector(length * 3);
570 char* buffer = bufferVector.data();
571 const UChar* characters = m_impl.data();
573 ConversionResult result =
574 convertUTF16ToUTF8(&characters, characters + length, &buffer,
575 buffer + bufferVector.size(),
false);
581 DCHECK(result != sourceIllegal);
584 if (result == sourceExhausted) {
589 DCHECK((characters + 1) == (m_impl.data() + length));
590 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF));
593 DCHECK((buffer + 3) <= (buffer + bufferVector.size()));
594 putUTF8Triple(buffer, *characters);
597 return std::string(bufferVector.data(), buffer - bufferVector.data());