diff --git a/Sources/NIOCore/NIODecodedAsyncSequence.swift b/Sources/NIOCore/NIODecodedAsyncSequence.swift index c84105277e1..666498ae8a9 100644 --- a/Sources/NIOCore/NIODecodedAsyncSequence.swift +++ b/Sources/NIOCore/NIODecodedAsyncSequence.swift @@ -95,8 +95,8 @@ extension NIODecodedAsyncSequence: AsyncSequence { public struct AsyncIterator: AsyncIteratorProtocol { @usableFromInline enum State: Sendable { - case readingFromBuffer - case readLastChunkFromBuffer + case canReadFromBaseIterator + case baseIteratorIsExhausted case finishedDecoding } @@ -114,7 +114,7 @@ extension NIODecodedAsyncSequence: AsyncSequence { base.decoder, maximumBufferSize: base.maximumBufferSize ) - self.state = .readingFromBuffer + self.state = .canReadFromBaseIterator } /// Retrieve the next element from the ``NIODecodedAsyncSequence``. @@ -127,7 +127,7 @@ extension NIODecodedAsyncSequence: AsyncSequence { switch self.state { case .finishedDecoding: return nil - case .readingFromBuffer: + case .canReadFromBaseIterator: let (decoded, ended) = try self.processor.decodeNext( decodeMode: .normal, seenEOF: false @@ -144,11 +144,11 @@ extension NIODecodedAsyncSequence: AsyncSequence { // Read more data into the buffer so we can decode more messages guard let nextBuffer = try await self.baseIterator.next() else { // Ran out of data to read. - self.state = .readLastChunkFromBuffer + self.state = .baseIteratorIsExhausted continue } self.processor.append(nextBuffer) - case .readLastChunkFromBuffer: + case .baseIteratorIsExhausted: let (decoded, ended) = try self.processor.decodeNext( decodeMode: .last, seenEOF: true @@ -175,7 +175,7 @@ extension NIODecodedAsyncSequence: AsyncSequence { switch self.state { case .finishedDecoding: return nil - case .readingFromBuffer: + case .canReadFromBaseIterator: let (decoded, ended) = try self.processor.decodeNext( decodeMode: .normal, seenEOF: false @@ -192,11 +192,11 @@ extension NIODecodedAsyncSequence: AsyncSequence { // Read more data into the buffer so we can decode more messages guard let nextBuffer = try await self.baseIterator.next(isolation: actor) else { // Ran out of data to read. - self.state = .readLastChunkFromBuffer + self.state = .baseIteratorIsExhausted continue } self.processor.append(nextBuffer) - case .readLastChunkFromBuffer: + case .baseIteratorIsExhausted: let (decoded, ended) = try self.processor.decodeNext( decodeMode: .last, seenEOF: true diff --git a/Sources/NIOCore/NIOSplitLinesMessageDecoder.swift b/Sources/NIOCore/NIOSplitLinesMessageDecoder.swift new file mode 100644 index 00000000000..6d46e4c19b4 --- /dev/null +++ b/Sources/NIOCore/NIOSplitLinesMessageDecoder.swift @@ -0,0 +1,414 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftNIO open source project +// +// Copyright (c) 2025 Apple Inc. and the SwiftNIO project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftNIO project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// + +@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *) +extension AsyncSequence where Element == ByteBuffer { + /// Returns the longest possible subsequences of the sequence, in order, + /// that are separated by line breaks. + /// + /// The following Characters are considered line breaks, similar to + /// standard library's `String.split(whereSeparator: \.isNewline)`: + /// - "\n" (U+000A): LINE FEED (LF) + /// - U+000B: LINE TABULATION (VT) + /// - U+000C: FORM FEED (FF) + /// - "\r" (U+000D): CARRIAGE RETURN (CR) + /// - "\r\n" (U+000D U+000A): CR-LF + /// + /// The following Characters are NOT considered line breaks, unlike in + /// standard library's `String.split(whereSeparator: \.isNewline)`: + /// - U+0085: NEXT LINE (NEL) + /// - U+2028: LINE SEPARATOR + /// - U+2029: PARAGRAPH SEPARATOR + /// + /// This is because these characters would require unicode and data-encoding awareness, which + /// are outside swift-nio's scope. + /// + /// Usage: + /// ```swift + /// let baseSequence = MyAsyncSequence(...) + /// let splitLinesSequence = baseSequence.splitLines() + /// + /// for try await buffer in splitLinesSequence { + /// print("Split by line breaks!\n", buffer.hexDump(format: .detailed)) + /// } + /// ``` + /// + /// - Parameters: + /// - omittingEmptySubsequences: If `false`, an empty subsequence is + /// returned in the result for each consecutive line break in the sequence. + /// If `true`, only nonempty subsequences are returned. The default value is `true`. + /// - maximumBufferSize: The maximum number of bytes to aggregate in-memory. + /// An error will be thrown if after decoding an element there is more aggregated data than this amount. + /// - Returns: An `AsyncSequence` of ``ByteBuffer``s, split from the this async sequence's bytes. + /// + /// - Complexity: O(*n*), where *n* is the length of the file. + @inlinable + public func splitLines( + omittingEmptySubsequences: Bool = true, + maximumBufferSize: Int? = nil + ) -> NIODecodedAsyncSequence { + self.decode( + using: NIOSplitLinesMessageDecoder( + omittingEmptySubsequences: omittingEmptySubsequences + ), + maximumBufferSize: maximumBufferSize + ) + } + + /// Returns the longest possible `String`s of the sequence, in order, + /// that are separated by line breaks. + /// + /// The following Characters are considered line breaks, similar to + /// standard library's `String.split(whereSeparator: \.isNewline)`: + /// - "\n" (U+000A): LINE FEED (LF) + /// - U+000B: LINE TABULATION (VT) + /// - U+000C: FORM FEED (FF) + /// - "\r" (U+000D): CARRIAGE RETURN (CR) + /// - "\r\n" (U+000D U+000A): CR-LF + /// + /// The following Characters are NOT considered line breaks, unlike in + /// standard library's `String.split(whereSeparator: \.isNewline)`: + /// - U+0085: NEXT LINE (NEL) + /// - U+2028: LINE SEPARATOR + /// - U+2029: PARAGRAPH SEPARATOR + /// + /// This is because these characters would require unicode and data-encoding awareness, which + /// are outside swift-nio's scope. + /// + /// Usage: + /// ```swift + /// let baseSequence = MyAsyncSequence(...) + /// let splitLinesSequence = baseSequence.splitUTF8Lines() + /// + /// for try await string in splitLinesSequence { + /// print("Split by line breaks!\n", string) + /// } + /// ``` + /// + /// - Parameters: + /// - omittingEmptySubsequences: If `false`, an empty subsequence is + /// returned in the result for each consecutive line break in the sequence. + /// If `true`, only nonempty subsequences are returned. The default value is `true`. + /// - maximumBufferSize: The maximum number of bytes to aggregate in-memory. + /// An error will be thrown if after decoding an element there is more aggregated data than this amount. + /// - Returns: An `AsyncSequence` of `String`s, split from the this async sequence's bytes. + /// + /// - Complexity: O(*n*), where *n* is the length of the file. + @inlinable + public func splitUTF8Lines( + omittingEmptySubsequences: Bool = true, + maximumBufferSize: Int? = nil + ) -> NIODecodedAsyncSequence { + self.decode( + using: NIOSplitUTF8LinesMessageDecoder( + omittingEmptySubsequences: omittingEmptySubsequences + ), + maximumBufferSize: maximumBufferSize + ) + } +} + +// MARK: - SplitMessageDecoder + +/// A decoder which splits the data into subsequences that are separated by a given separator. +/// Similar to standard library's `String.split(separator:maxSplits:omittingEmptySubsequences:)`. +/// +/// This decoder can be used to introduce a `AsyncSequence/split(omittingEmptySubsequences:maximumBufferSize:whereSeparator:)` +/// function. We could not come up with valid use-cases for such a function so we held off on introducing it. +/// See https://github.com/apple/swift-nio/pull/3411 for more info if you need such a function. +@usableFromInline +struct SplitMessageDecoder: NIOSingleStepByteToMessageDecoder { + @usableFromInline + typealias InboundOut = ByteBuffer + + @usableFromInline + let omittingEmptySubsequences: Bool + @usableFromInline + let isSeparator: (UInt8) -> Bool + @usableFromInline + var ended: Bool + @usableFromInline + var bytesWithNoSeparatorsCount: Int + + @inlinable + init( + omittingEmptySubsequences: Bool = true, + whereSeparator isSeparator: @escaping (UInt8) -> Bool + ) { + self.omittingEmptySubsequences = omittingEmptySubsequences + self.isSeparator = isSeparator + self.ended = false + self.bytesWithNoSeparatorsCount = 0 + } + + /// Decode the next message from the given buffer. + @inlinable + mutating func decode( + buffer: inout ByteBuffer, + hasReceivedLastChunk: Bool + ) throws -> (buffer: InboundOut, separator: UInt8?)? { + if self.ended { return nil } + + while true { + let startIndex = buffer.readerIndex + self.bytesWithNoSeparatorsCount + if let separatorIndex = buffer.readableBytesView[startIndex...].firstIndex(where: self.isSeparator) { + // Safe to force unwrap. We just found a separator somewhere in the buffer. + let slice = buffer.readSlice(length: separatorIndex - buffer.readerIndex)! + // Reset for the next search since we found a separator. + self.bytesWithNoSeparatorsCount = 0 + + if self.omittingEmptySubsequences, + slice.readableBytes == 0 + { + // Mark the separator itself as read + buffer._moveReaderIndex(forwardBy: 1) + continue + } + + // Read the separator itself + // Safe to force unwrap. We just found a separator somewhere in the buffer. + let separator = buffer.readInteger(as: UInt8.self)! + + return (slice, separator) + } else { + guard hasReceivedLastChunk else { + // Make sure we don't double-check these no-separator bytes again. + self.bytesWithNoSeparatorsCount = buffer.readableBytes + // Need more data + return nil + } + + // At this point, we're ending the decoding process. + self.ended = true + + if self.omittingEmptySubsequences, + buffer.readableBytes == 0 + { + return nil + } + + // Just send the whole buffer if we're at the last chunk but we can find no separators + // Safe to force unwrap. `buffer.readableBytes` is `0` in the worst case. + let slice = buffer.readSlice(length: buffer.readableBytes)! + + return (slice, nil) + } + } + } + + /// Decode the next message separated by the provided separator. + /// To be used when we're still receiving data. + @inlinable + mutating func decode(buffer: inout ByteBuffer) throws -> InboundOut? { + try self.decode(buffer: &buffer, hasReceivedLastChunk: false)?.buffer + } + + /// Decode the next message separated by the provided separator. + /// To be used when the last chunk of data has been received. + @inlinable + mutating func decodeLast(buffer: inout ByteBuffer, seenEOF: Bool) throws -> InboundOut? { + try self.decode(buffer: &buffer, hasReceivedLastChunk: true)?.buffer + } +} + +@available(*, unavailable) +extension SplitMessageDecoder: Sendable {} + +// MARK: - NIOSplitLinesMessageDecoder + +/// A decoder which splits the data into subsequences that are separated by line breaks. +/// +/// You can initialize this type directly, or use +/// `AsyncSequence/splitLines(omittingEmptySubsequences:maximumBufferSize:)` to create a +/// `NIODecodedAsyncSequence` that uses this decoder. +/// +/// The following Characters are considered line breaks, similar to +/// standard library's `String.split(whereSeparator: \.isNewline)`: +/// - "\n" (U+000A): LINE FEED (LF) +/// - U+000B: LINE TABULATION (VT) +/// - U+000C: FORM FEED (FF) +/// - "\r" (U+000D): CARRIAGE RETURN (CR) +/// - "\r\n" (U+000D U+000A): CR-LF +/// +/// The following Characters are NOT considered line breaks, unlike in +/// standard library's `String.split(whereSeparator: \.isNewline)`: +/// - U+0085: NEXT LINE (NEL) +/// - U+2028: LINE SEPARATOR +/// - U+2029: PARAGRAPH SEPARATOR +/// +/// This is because these characters would require unicode and data-encoding awareness, which +/// are outside swift-nio's scope. +/// +/// Usage: +/// ```swift +/// let baseSequence = MyAsyncSequence(...) +/// let splitLinesSequence = baseSequence.splitLines() +/// +/// for try await buffer in splitLinesSequence { +/// print("Split by line breaks!\n", buffer.hexDump(format: .detailed)) +/// } +/// ``` +public struct NIOSplitLinesMessageDecoder: NIOSingleStepByteToMessageDecoder { + public typealias InboundOut = ByteBuffer + + @usableFromInline + var splitDecoder: SplitMessageDecoder + @usableFromInline + var previousSeparatorWasCR: Bool + + @inlinable + public init(omittingEmptySubsequences: Bool) { + self.splitDecoder = SplitMessageDecoder( + omittingEmptySubsequences: omittingEmptySubsequences, + whereSeparator: Self.isLineBreak + ) + self.previousSeparatorWasCR = false + } + + /// - ASCII 10 - "\n" (U+000A): LINE FEED (LF) + /// - ASCII 11 - U+000B: LINE TABULATION (VT) + /// - ASCII 12 - U+000C: FORM FEED (FF) + /// - ASCII 13 - "\r" (U+000D): CARRIAGE RETURN (CR) + /// + /// "\r\n" is manually accounted for during the decoding. + @inlinable + static func isLineBreak(_ byte: UInt8) -> Bool { + /// All the 4 ASCII bytes are in range of \n to \r. + (UInt8(ascii: "\n")...UInt8(ascii: "\r")).contains(byte) + } + + /// Decode the next message from the given buffer. + @inlinable + mutating func decode(buffer: inout ByteBuffer, hasReceivedLastChunk: Bool) throws -> InboundOut? { + while true { + guard + let (slice, separator) = try self.splitDecoder.decode( + buffer: &buffer, + hasReceivedLastChunk: hasReceivedLastChunk + ) + else { + return nil + } + + // If we are getting rid of empty subsequences then it doesn't matter if we detect + // \r\n as a CR-LF, or as a CR + a LF. The backing decoder gets rid of the empty subsequence + // anyway. Therefore, we can return early right here and skip the rest of the logic. + if self.splitDecoder.omittingEmptySubsequences { + return slice + } + + // "\r\n" is 2 bytes long, so we need to manually account for it. + switch separator { + case UInt8(ascii: "\n") where slice.readableBytes == 0: + let isCRLF = self.previousSeparatorWasCR + self.previousSeparatorWasCR = false + if isCRLF { + continue + } + case UInt8(ascii: "\r"): + self.previousSeparatorWasCR = true + default: + self.previousSeparatorWasCR = false + } + + return slice + } + } + + /// Decode the next message separated by one of the ASCII line breaks. + /// To be used when we're still receiving data. + @inlinable + public mutating func decode(buffer: inout ByteBuffer) throws -> InboundOut? { + try self.decode(buffer: &buffer, hasReceivedLastChunk: false) + } + + /// Decode the next message separated by one of the ASCII line breaks. + /// To be used when the last chunk of data has been received. + @inlinable + public mutating func decodeLast(buffer: inout ByteBuffer, seenEOF: Bool) throws -> InboundOut? { + try self.decode(buffer: &buffer, hasReceivedLastChunk: true) + } +} + +@available(*, unavailable) +extension NIOSplitLinesMessageDecoder: Sendable {} + +// MARK: - NIOSplitUTF8LinesMessageDecoder + +/// A decoder which splits the data into subsequences that are separated by line breaks. +/// +/// You can initialize this type directly, or use +/// `AsyncSequence/splitUTF8Lines(omittingEmptySubsequences:maximumBufferSize:)` to create a +/// `NIODecodedAsyncSequence` that uses this decoder. +/// +/// The following Characters are considered line breaks, similar to +/// standard library's `String.split(whereSeparator: \.isNewline)`: +/// - "\n" (U+000A): LINE FEED (LF) +/// - U+000B: LINE TABULATION (VT) +/// - U+000C: FORM FEED (FF) +/// - "\r" (U+000D): CARRIAGE RETURN (CR) +/// - "\r\n" (U+000D U+000A): CR-LF +/// +/// The following Characters are NOT considered line breaks, unlike in +/// standard library's `String.split(whereSeparator: \.isNewline)`: +/// - U+0085: NEXT LINE (NEL) +/// - U+2028: LINE SEPARATOR +/// - U+2029: PARAGRAPH SEPARATOR +/// +/// This is because these characters would require unicode and data-encoding awareness, which +/// are outside swift-nio's scope. +/// +/// Usage: +/// ```swift +/// let baseSequence = MyAsyncSequence(...) +/// let splitLinesSequence = baseSequence.splitUTF8Lines() +/// +/// for try await string in splitLinesSequence { +/// print("Split by line breaks!\n", string) +/// } +/// ``` +public struct NIOSplitUTF8LinesMessageDecoder: NIOSingleStepByteToMessageDecoder { + public typealias InboundOut = String + + @usableFromInline + var splitLinesDecoder: NIOSplitLinesMessageDecoder + + @inlinable + public init(omittingEmptySubsequences: Bool) { + self.splitLinesDecoder = NIOSplitLinesMessageDecoder( + omittingEmptySubsequences: omittingEmptySubsequences + ) + } + + /// Decode the next message separated by one of the ASCII line breaks. + /// To be used when we're still receiving data. + @inlinable + public mutating func decode(buffer: inout ByteBuffer) throws -> InboundOut? { + try self.splitLinesDecoder.decode(buffer: &buffer, hasReceivedLastChunk: false).map { + String(buffer: $0) + } + } + + /// Decode the next message separated by one of the ASCII line breaks. + /// To be used when the last chunk of data has been received. + @inlinable + public mutating func decodeLast(buffer: inout ByteBuffer, seenEOF: Bool) throws -> InboundOut? { + try self.splitLinesDecoder.decode(buffer: &buffer, hasReceivedLastChunk: true).map { + String(buffer: $0) + } + } +} + +@available(*, unavailable) +extension NIOSplitUTF8LinesMessageDecoder: Sendable {} diff --git a/Tests/NIOCoreTests/NIODecodedAsyncSequenceTests.swift b/Tests/NIOCoreTests/NIODecodedAsyncSequenceTests.swift index ef73c7237c8..a22cd7fc5dc 100644 --- a/Tests/NIOCoreTests/NIODecodedAsyncSequenceTests.swift +++ b/Tests/NIOCoreTests/NIODecodedAsyncSequenceTests.swift @@ -65,16 +65,8 @@ struct NIODecodedAsyncSequenceTests { buffer.writeInteger(element) } - let baseSequence = AsyncStream.makeStream() - while buffer.readableBytes > 0 { - let length = min(buffer.readableBytes, chunkSize) - let _slice = buffer.readSlice(length: length) - let slice = try #require(_slice) - baseSequence.continuation.yield(slice) - } - baseSequence.continuation.finish() - - let decodedSequence = baseSequence.stream.decode(using: ByteToInt32Decoder()) + let stream = try self.makeFinishedAsyncStream(using: buffer, chunkSize: chunkSize) + let decodedSequence = stream.decode(using: ByteToInt32Decoder()) for try await element in decodedSequence { #expect(element == randomElements.removeFirst()) @@ -93,16 +85,9 @@ struct NIODecodedAsyncSequenceTests { buffer.writeInteger(element) } - let baseSequence = AsyncStream.makeStream() - while buffer.readableBytes > 0 { - let length = min(buffer.readableBytes, chunkSize) - let _slice = buffer.readSlice(length: length) - let slice = try #require(_slice) - baseSequence.continuation.yield(slice) - } - baseSequence.continuation.finish() + let stream = try self.makeFinishedAsyncStream(using: buffer, chunkSize: chunkSize) + let decodedSequence = stream.decode(using: ThrowingDecoder()) - let decodedSequence = baseSequence.stream.decode(using: ThrowingDecoder()) await #expect(throws: ThrowingDecoder.DecoderError.self) { for try await _ in decodedSequence { Issue.record("Should not have reached here") @@ -125,4 +110,25 @@ struct NIODecodedAsyncSequenceTests { } } } + + private func makeFinishedAsyncStream( + using buffer: ByteBuffer, + chunkSize: Int + ) throws -> AsyncStream { + var buffer = buffer + let sequence = AsyncStream.makeStream() + while buffer.readableBytes > 0 { + if Int.random(in: 0..<4) == 0 { + // Insert an empty buffer to test the behavior of the decoder. + sequence.continuation.yield(ByteBuffer()) + continue + } + let length = min(buffer.readableBytes, chunkSize) + let _slice = buffer.readSlice(length: length) + let slice = try #require(_slice) + sequence.continuation.yield(slice) + } + sequence.continuation.finish() + return sequence.stream + } } diff --git a/Tests/NIOCoreTests/NIOSplitLinesMessageDecoderTests.swift b/Tests/NIOCoreTests/NIOSplitLinesMessageDecoderTests.swift new file mode 100644 index 00000000000..ea61f91846f --- /dev/null +++ b/Tests/NIOCoreTests/NIOSplitLinesMessageDecoderTests.swift @@ -0,0 +1,313 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftNIO open source project +// +// Copyright (c) 2025 Apple Inc. and the SwiftNIO project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftNIO project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// + +import Testing + +@testable import NIOCore + +struct NIOSplitLinesMessageDecoderTests { + @Test( + arguments: SplittingTextTestArgument.allHardcodedArguments + + SplittingTextTestArgument.allProducedUsingSTDLibSplitFunction + ) + private func splittingTextWorksCorrectly(argument: SplittingTextTestArgument) async throws { + let buffer = ByteBuffer(string: argument.text) + let stream = try self.makeFinishedAsyncStream(using: buffer, chunkSize: argument.chunkSize) + + let decodedSequence = stream.decode( + using: SplitMessageDecoder( + omittingEmptySubsequences: argument.omittingEmptySubsequences, + whereSeparator: { $0 == argument.separator } + ) + ) + + var producedElements = [ByteBuffer]() + producedElements.reserveCapacity(argument.expectedElements.count) + for try await element in decodedSequence { + producedElements.append(element) + } + + #expect( + producedElements == argument.expectedElements.map(ByteBuffer.init), + """ + Produced elements: \(producedElements.map(String.init(buffer:)).debugDescription) + Expected elements: \(argument.expectedElements.debugDescription) + """ + ) + } + + @Test( + arguments: SplittingLinesTestArgument.allHardcodedArguments + + SplittingLinesTestArgument.allProducedUsingSTDLibSplitFunction + ) + private func splittingLinesWorksCorrectly(argument: SplittingLinesTestArgument) async throws { + let buffer = ByteBuffer(string: argument.text) + let stream = try self.makeFinishedAsyncStream(using: buffer, chunkSize: argument.chunkSize) + + let decodedSequence = stream.splitLines( + omittingEmptySubsequences: argument.omittingEmptySubsequences + ) + + var producedElements = [ByteBuffer]() + producedElements.reserveCapacity(argument.expectedElements.count) + for try await element in decodedSequence { + producedElements.append(element) + } + + #expect( + producedElements == argument.expectedElements.map(ByteBuffer.init), + """ + Produced elements: \(producedElements.map(String.init(buffer:)).debugDescription) + Expected elements: \(argument.expectedElements.debugDescription) + """ + ) + } + + @Test( + arguments: SplittingLinesTestArgument.allHardcodedArguments + + SplittingLinesTestArgument.allProducedUsingSTDLibSplitFunction + ) + private func splittingUTF8LinesWorksCorrectly(argument: SplittingLinesTestArgument) async throws { + let buffer = ByteBuffer(string: argument.text) + let stream = try self.makeFinishedAsyncStream(using: buffer, chunkSize: argument.chunkSize) + + let decodedSequence = stream.splitUTF8Lines( + omittingEmptySubsequences: argument.omittingEmptySubsequences + ) + + var producedElements = [String]() + producedElements.reserveCapacity(argument.expectedElements.count) + for try await element in decodedSequence { + producedElements.append(element) + } + + #expect(producedElements == argument.expectedElements) + } + + private func makeFinishedAsyncStream( + using buffer: ByteBuffer, + chunkSize: Int + ) throws -> AsyncStream { + var buffer = buffer + let sequence = AsyncStream.makeStream() + while buffer.readableBytes > 0 { + if Int.random(in: 0..<4) == 0 { + // Insert an empty buffer to test the behavior of the decoder. + sequence.continuation.yield(ByteBuffer()) + continue + } + let length = min(buffer.readableBytes, chunkSize) + let _slice = buffer.readSlice(length: length) + let slice = try #require(_slice) + sequence.continuation.yield(slice) + } + sequence.continuation.finish() + return sequence.stream + } +} + +private struct SplittingTextTestArgument { + let text: String + let separator: UInt8 + let omittingEmptySubsequences: Bool + let chunkSize: Int + let expectedElements: [String] + + static let text = """ + Here's to the crazy ones, th e misfits, t he rebels, the troublemakers, \ + the round . pegs in the square holes, the ones who see thi ngs + differently . + """ + static let separators: [String] = ["a", ".", " ", "\n", ",", "r"] + static let chunkSizes: [Int] = [1, 2, 5, 8, 10, 15, 16, 20, 50, 100, 500] + + static var allProducedUsingSTDLibSplitFunction: [Self] { + Self.separators.flatMap { separator -> [Self] in + [true, false].flatMap { omittingEmptySubsequences -> [Self] in + Self.chunkSizes.map { chunkSize -> Self in + Self( + text: Self.text, + separator: separator.utf8.first!, + omittingEmptySubsequences: omittingEmptySubsequences, + chunkSize: chunkSize, + expectedElements: Self.text.split( + omittingEmptySubsequences: omittingEmptySubsequences, + whereSeparator: { $0 == separator.first } + ).map(String.init) + ) + } + } + } + } + + /// These are hard-coded so we don't have a hard dependency on the standard library's `String.split` function. + /// Also so one can eyeball the whole argument if needed. + static var allHardcodedArguments: [Self] { + [ + SplittingTextTestArgument( + text: Self.text, + separator: UInt8(ascii: " "), + omittingEmptySubsequences: true, + chunkSize: 50, + expectedElements: [ + "Here\'s", + "to", + "the", + "crazy", + "ones,", + "th", + "e", + "misfits,", + "t", + "he", + "rebels,", + "the", + "troublemakers,", + "the", + "round", + ".", + "pegs", + "in", + "the", + "square", + "holes,", + "the", + "ones", + "who", + "see", + "thi", + "ngs\ndifferently", + ".", + ] + ), + SplittingTextTestArgument( + text: Self.text, + separator: UInt8(ascii: " "), + omittingEmptySubsequences: false, + chunkSize: 50, + expectedElements: [ + "", "", "", + "Here\'s", + "to", + "the", + "", "", + "crazy", + "", "", "", + "ones,", + "th", + "", "", "", + "e", + "misfits,", + "t", + "he", + "rebels,", + "the", + "troublemakers,", + "the", + "", "", + "round", + ".", + "pegs", + "in", + "the", + "square", + "holes,", + "the", + "ones", + "who", + "", "", "", + "see", + "", "", + "thi", + "", "", "", + "ngs\ndifferently", + ".", + ] + ), + ] + } +} + +private struct SplittingLinesTestArgument { + let text: String + let omittingEmptySubsequences: Bool + let chunkSize: Int + let expectedElements: [String] + + /// A text with a lot of line breaks. + /// U+0009, U+000E are not considered line breaks, they are included since they are right + /// outside the line-break-byte boundaries (U+000A to U+000D). + static let text = """ + Here's to the \n\r\n\u{000B}\n\r\u{000C}\n\r\n\r\n\r\n\n\r\n\r crazy ones, th e misfits, t he rebels, + the troublemakers, \u{0009} the \u{000C} round \u{000E}. pegs in \u{000B}\u{000C}\r\n\r\n\r\n\r\n\rthe square holes, the ones + who \r\n see thi \n ngs differently .\u{000B} + """ + static let chunkSizes: [Int] = [1, 2, 5, 8, 10, 15, 16, 20, 50, 100, 500] + + static var allProducedUsingSTDLibSplitFunction: [Self] { + [true, false].flatMap { omittingEmptySubsequences -> [Self] in + Self.chunkSizes.map { chunkSize -> Self in + Self( + text: Self.text, + omittingEmptySubsequences: omittingEmptySubsequences, + chunkSize: chunkSize, + expectedElements: Self.text.split( + omittingEmptySubsequences: omittingEmptySubsequences, + whereSeparator: \.isNewline + ).map(String.init) + ) + } + } + } + + /// These are hard-coded so we don't have a hard dependency on the standard library's `String.split` function. + /// Also so one can eyeball the whole argument if needed. + static var allHardcodedArguments: [Self] { + [ + SplittingLinesTestArgument( + text: Self.text, + omittingEmptySubsequences: true, + chunkSize: 100, + expectedElements: [ + " Here\'s to the ", + " crazy ones, th e misfits, t he rebels,", + "the troublemakers, \t the ", + " round \u{0E}. pegs in ", + "the square holes, the ones", + "who ", + " see thi ", + " ngs differently .", + ] + ), + SplittingLinesTestArgument( + text: Self.text, + omittingEmptySubsequences: false, + chunkSize: 100, + expectedElements: [ + " Here\'s to the ", + "", "", "", "", "", "", "", "", "", "", "", "", + " crazy ones, th e misfits, t he rebels,", + "the troublemakers, \t the ", + " round \u{0E}. pegs in ", + "", "", "", "", "", "", + "the square holes, the ones", + "who ", + " see thi ", + " ngs differently .", + "", + ] + ), + ] + } +}