66using System . Diagnostics ;
77using System . IO ;
88using System . Text ;
9+ using System . Text . Unicode ;
910using System . Threading ;
1011using System . Threading . Tasks ;
1112
1213namespace Microsoft . AspNetCore . Mvc . Formatters . Json
1314{
1415 internal sealed class TranscodingReadStream : Stream
1516 {
17+ private const int OverflowBufferSize = 4 ; // The most number of bytes used to represent a single UTF char
18+
1619 internal const int MaxByteBufferSize = 4096 ;
1720 internal const int MaxCharBufferSize = 3 * MaxByteBufferSize ;
18- private static readonly int MaxByteCountForUTF8Char = Encoding . UTF8 . GetMaxByteCount ( charCount : 1 ) ;
1921
2022 private readonly Stream _stream ;
21- private readonly Encoder _encoder ;
2223 private readonly Decoder _decoder ;
2324
2425 private ArraySegment < byte > _byteBuffer ;
@@ -48,19 +49,23 @@ public TranscodingReadStream(Stream input, Encoding sourceEncoding)
4849 count : 0 ) ;
4950
5051 _overflowBuffer = new ArraySegment < byte > (
51- ArrayPool < byte > . Shared . Rent ( MaxByteCountForUTF8Char ) ,
52+ ArrayPool < byte > . Shared . Rent ( OverflowBufferSize ) ,
5253 0 ,
5354 count : 0 ) ;
5455
55- _encoder = Encoding . UTF8 . GetEncoder ( ) ;
5656 _decoder = sourceEncoding . GetDecoder ( ) ;
5757 }
5858
5959 public override bool CanRead => true ;
6060 public override bool CanSeek => false ;
6161 public override bool CanWrite => false ;
6262 public override long Length => throw new NotSupportedException ( ) ;
63- public override long Position { get ; set ; }
63+
64+ public override long Position
65+ {
66+ get => throw new NotSupportedException ( ) ;
67+ set => throw new NotSupportedException ( ) ;
68+ }
6469
6570 internal int ByteBufferCount => _byteBuffer . Count ;
6671 internal int CharBufferCount => _charBuffer . Count ;
@@ -76,6 +81,11 @@ public override async Task<int> ReadAsync(byte[] buffer, int offset, int count,
7681 {
7782 ThrowArgumentOutOfRangeException ( buffer , offset , count ) ;
7883
84+ if ( count == 0 )
85+ {
86+ return 0 ;
87+ }
88+
7989 var readBuffer = new ArraySegment < byte > ( buffer , offset , count ) ;
8090
8191 if ( _overflowBuffer . Count > 0 )
@@ -90,76 +100,50 @@ public override async Task<int> ReadAsync(byte[] buffer, int offset, int count,
90100 return bytesToCopy ;
91101 }
92102
93- var totalBytes = 0 ;
94- bool encoderCompleted ;
95- int bytesEncoded ;
103+ if ( _charBuffer . Count == 0 )
104+ {
105+ // Only read more content from the input stream if we have exhausted all the buffered chars.
106+ await ReadInputChars ( cancellationToken ) ;
107+ }
108+
109+ var operationStatus = Utf8 . FromUtf16 ( _charBuffer , readBuffer , out var charsRead , out var bytesWritten , isFinalBlock : false ) ;
110+ _charBuffer = _charBuffer . Slice ( charsRead ) ;
96111
97- do
112+ switch ( operationStatus )
98113 {
99- // If we had left-over bytes from a previous read, move it to the start of the buffer and read content in to
100- // the segment that follows.
101- var eof = false ;
102- if ( _charBuffer . Count == 0 )
103- {
104- // Only read more content from the input stream if we have exhausted all the buffered chars.
105- eof = await ReadInputChars ( cancellationToken ) ;
106- }
107-
108- // We need to flush on the last write. This is true when we exhaust the input Stream and any buffered content.
109- var allContentRead = eof && _charBuffer . Count == 0 && _byteBuffer . Count == 0 ;
110-
111- if ( _charBuffer . Count > 0 && readBuffer . Count < MaxByteCountForUTF8Char && readBuffer . Count < Encoding . UTF8 . GetByteCount ( _charBuffer . AsSpan ( 0 , 1 ) ) )
112- {
113- // It's possible that the passed in buffer is smaller than the size required to encode a single
114- // char. For instance, the JsonSerializer may pass in a buffer of size 1 or 2 which
115- // is insufficient if the character requires more than 2 bytes to represent. In this case, read
116- // content in to an overflow buffer and fill up the passed in buffer.
117- _encoder . Convert (
118- _charBuffer ,
119- _overflowBuffer . Array ,
120- flush : false ,
121- out var charsUsed ,
122- out var bytesUsed ,
123- out _ ) ;
114+ case OperationStatus . Done :
115+ return bytesWritten ;
116+
117+ case OperationStatus . DestinationTooSmall :
118+ if ( bytesWritten != 0 )
119+ {
120+ return bytesWritten ;
121+ }
124122
125- _charBuffer = _charBuffer . Slice ( charsUsed ) ;
123+ // Overflow buffer is always empty when we get here and we can use it's full length to write contents to.
124+ Utf8 . FromUtf16 ( _charBuffer , _overflowBuffer . Array , out var overFlowChars , out var overflowBytes , isFinalBlock : false ) ;
125+ Debug . Assert ( overflowBytes > 0 && overFlowChars > 0 , "We expect writes to the overflow buffer to always succeed since it is large enough to accomodate at least one char." ) ;
126+ Debug . Assert ( readBuffer . Count < overflowBytes ) ;
126127
127- Debug . Assert ( readBuffer . Count < bytesUsed ) ;
128+ _charBuffer = _charBuffer . Slice ( overFlowChars ) ;
128129 _overflowBuffer . Array . AsSpan ( 0 , readBuffer . Count ) . CopyTo ( readBuffer ) ;
129130
130131 _overflowBuffer = new ArraySegment < byte > (
131132 _overflowBuffer . Array ,
132133 readBuffer . Count ,
133- bytesUsed - readBuffer . Count ) ;
134-
135- totalBytes += readBuffer . Count ;
136- // At this point we're done writing.
137- break ;
138- }
139- else
140- {
141- _encoder . Convert (
142- _charBuffer ,
143- readBuffer ,
144- flush : allContentRead ,
145- out var charsUsed ,
146- out bytesEncoded ,
147- out encoderCompleted ) ;
148-
149- totalBytes += bytesEncoded ;
150- _charBuffer = _charBuffer . Slice ( charsUsed ) ;
151- readBuffer = readBuffer . Slice ( bytesEncoded ) ;
152- }
153-
154- // We need to exit in one of the 2 conditions:
155- // * encoderCompleted will return false if "buffer" was too small for all the chars to be encoded.
156- // * no bytes were converted in an iteration. This can occur if there wasn't any input.
157- } while ( encoderCompleted && bytesEncoded > 0 ) ;
158-
159- return totalBytes ;
134+ overflowBytes - readBuffer . Count ) ;
135+
136+ Debug . Assert ( _overflowBuffer . Count != 0 ) ;
137+
138+ return readBuffer . Count ;
139+
140+ default :
141+ Debug . Fail ( "We should never see this" ) ;
142+ throw new InvalidOperationException ( ) ;
143+ }
160144 }
161145
162- private async ValueTask < bool > ReadInputChars ( CancellationToken cancellationToken )
146+ private async Task ReadInputChars ( CancellationToken cancellationToken )
163147 {
164148 // If we had left-over bytes from a previous read, move it to the start of the buffer and read content in to
165149 // the segment that follows.
@@ -184,15 +168,12 @@ private async ValueTask<bool> ReadInputChars(CancellationToken cancellationToken
184168 out _ ) ;
185169
186170 _byteBuffer = _byteBuffer . Slice ( bytesUsed ) ;
187-
188171 _charBuffer = new ArraySegment < char > ( _charBuffer . Array , 0 , charsUsed ) ;
189-
190- return readBytes == 0 ;
191172 }
192173
193174 private static void ThrowArgumentOutOfRangeException ( byte [ ] buffer , int offset , int count )
194175 {
195- if ( count <= 0 )
176+ if ( count < 0 )
196177 {
197178 throw new ArgumentOutOfRangeException ( nameof ( count ) ) ;
198179 }
0 commit comments