@@ -15,6 +15,7 @@ void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
1515 // Assume everything will fit in the buffer and stream won't be needed.
1616 last_byte_of_buffer_unused_ = false ;
1717 unbuffered_start_ = NULL ;
18+ unbuffered_length_ = 0 ;
1819 bool writing_to_buffer = true ;
1920 // Loop until stream is read, writing to buffer as long as buffer has space.
2021 size_t utf16_length = 0 ;
@@ -41,6 +42,7 @@ void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
4142 // Just wrote last character of buffer
4243 writing_to_buffer = false ;
4344 unbuffered_start_ = stream;
45+ unbuffered_length_ = stream_length;
4446 }
4547 continue ;
4648 }
@@ -50,19 +52,22 @@ void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
5052 writing_to_buffer = false ;
5153 last_byte_of_buffer_unused_ = true ;
5254 unbuffered_start_ = stream - cursor;
55+ unbuffered_length_ = stream_length + cursor;
5356 }
5457 utf16_length_ = utf16_length;
5558}
5659
5760
58- void Utf8DecoderBase::WriteUtf16Slow (const uint8_t * stream, uint16_t * data,
61+ void Utf8DecoderBase::WriteUtf16Slow (const uint8_t * stream,
62+ size_t stream_length, uint16_t * data,
5963 size_t data_length) {
6064 while (data_length != 0 ) {
6165 size_t cursor = 0 ;
62- uint32_t character = Utf8::ValueOf (stream, Utf8:: kMaxEncodedSize , &cursor);
66+ uint32_t character = Utf8::ValueOf (stream, stream_length , &cursor);
6367 // There's a total lack of bounds checking for stream
6468 // as it was already done in Reset.
6569 stream += cursor;
70+ stream_length -= cursor;
6671 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode ) {
6772 *data++ = Utf16::LeadSurrogate (character);
6873 *data++ = Utf16::TrailSurrogate (character);
@@ -73,6 +78,7 @@ void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
7378 data_length -= 1 ;
7479 }
7580 }
81+ DCHECK (stream_length >= 0 );
7682}
7783
7884} // namespace unibrow
0 commit comments