V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
unicode-decoder.cc
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 
6 #include "src/unicode-inl.h"
7 #include "src/unicode-decoder.h"
8 #include <stdio.h>
9 #include <stdlib.h>
10 
11 namespace unibrow {
12 
13 uint16_t Utf8Iterator::operator*() {
14  if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode)) {
15  return trailing_ ? Utf16::TrailSurrogate(char_)
16  : Utf16::LeadSurrogate(char_);
17  }
18 
19  DCHECK_EQ(trailing_, false);
20  return char_;
21 }
22 
23 Utf8Iterator& Utf8Iterator::operator++() {
24  if (V8_UNLIKELY(this->Done())) {
25  char_ = Utf8::kBufferEmpty;
26  return *this;
27  }
28 
29  if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode && !trailing_)) {
30  trailing_ = true;
31  return *this;
32  }
33 
34  trailing_ = false;
35  offset_ = cursor_;
36 
37  char_ =
38  Utf8::ValueOf(reinterpret_cast<const uint8_t*>(stream_.begin()) + cursor_,
39  stream_.length() - cursor_, &cursor_);
40  return *this;
41 }
42 
43 Utf8Iterator Utf8Iterator::operator++(int) {
44  Utf8Iterator old(*this);
45  ++*this;
46  return old;
47 }
48 
49 bool Utf8Iterator::Done() {
50  return offset_ == static_cast<size_t>(stream_.length());
51 }
52 
53 void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
54  const v8::internal::Vector<const char>& stream) {
55  size_t utf16_length = 0;
56 
57  Utf8Iterator it = Utf8Iterator(stream);
58  // Loop until stream is read, writing to buffer as long as buffer has space.
59  while (utf16_length < buffer_length && !it.Done()) {
60  *buffer++ = *it;
61  ++it;
62  utf16_length++;
63  }
64  bytes_read_ = it.Offset();
65  trailing_ = it.Trailing();
66  chars_written_ = utf16_length;
67 
68  // Now that writing to buffer is done, we just need to calculate utf16_length
69  while (!it.Done()) {
70  ++it;
71  utf16_length++;
72  }
73  utf16_length_ = utf16_length;
74 }
75 
76 void Utf8DecoderBase::WriteUtf16Slow(
77  uint16_t* data, size_t length,
78  const v8::internal::Vector<const char>& stream, size_t offset,
79  bool trailing) {
80  Utf8Iterator it = Utf8Iterator(stream, offset, trailing);
81  while (!it.Done()) {
82  DCHECK_GT(length--, 0);
83  *data++ = *it;
84  ++it;
85  }
86 }
87 
88 } // namespace unibrow