diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f0dafe1..b02a29a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed printing UTF-8 sequences that were read in multiple parts. (#468) + ### Changed ### Removed diff --git a/espflash/src/cli/monitor/mod.rs b/espflash/src/cli/monitor/mod.rs index 28f29052..b8b05de6 100644 --- a/espflash/src/cli/monitor/mod.rs +++ b/espflash/src/cli/monitor/mod.rs @@ -43,6 +43,7 @@ struct SerialContext<'ctx> { symbols: Option>, previous_frag: Option, previous_line: Option, + incomplete_utf8_buffer: Vec, } impl<'ctx> SerialContext<'ctx> { @@ -52,6 +53,40 @@ impl<'ctx> SerialContext<'ctx> { ..Self::default() } } + + fn process_utf8(&mut self, buff: &[u8]) -> String { + let mut buffer = std::mem::take(&mut self.incomplete_utf8_buffer); + buffer.extend(normalized(buff.iter().copied())); + + // look for longest slice that we can then lossily convert without introducing errors for + // partial sequences (#457) + let mut len = 0; + + loop { + match std::str::from_utf8(&buffer[len..]) { + // whole input is valid + Ok(str) if len == 0 => return String::from(str), + + // input is valid after the last error, and we could ignore the last error, so + // let's process the whole input + Ok(_) => return String::from_utf8_lossy(&buffer).to_string(), + + // input has some errors. We can ignore invalid sequences and replace them later, + // but we have to stop if we encounter an incomplete sequence. + Err(e) => { + len += e.valid_up_to(); + if let Some(error_len) = e.error_len() { + len += error_len; + } else { + // incomplete sequence. We split it off, save it for later + let (bytes, incomplete) = buffer.split_at(len); + self.incomplete_utf8_buffer = incomplete.to_vec(); + return String::from_utf8_lossy(bytes).to_string(); + } + } + } + } + } } /// Type that ensures that raw mode is disabled when dropped. @@ -144,8 +179,7 @@ pub fn monitor( /// Handles and writes the received serial data to the given output stream. fn handle_serial(ctx: &mut SerialContext, buff: &[u8], out: &mut dyn Write) { - let text: Vec = normalized(buff.iter().copied()).collect(); - let text = String::from_utf8_lossy(&text).to_string(); + let text = ctx.process_utf8(buff); // Split the text into lines, storing the last of which separately if it is // incomplete (ie. does not end with '\n') because these need special handling. @@ -278,3 +312,75 @@ fn handle_key_event(key_event: KeyEvent) -> Option> { key_str.map(|slice| slice.into()) } + +#[cfg(test)] +mod test { + #[test] + fn returns_valid_strings_immediately() { + let mut ctx = super::SerialContext::default(); + let buff = b"Hello, world!"; + let text = ctx.process_utf8(buff); + assert_eq!(text, "Hello, world!"); + } + + #[test] + fn does_not_repeat_valid_strings() { + let mut ctx = super::SerialContext::default(); + let text = ctx.process_utf8(b"Hello, world!"); + assert_eq!(text, "Hello, world!"); + let text = ctx.process_utf8(b"Something else"); + assert_eq!(text, "Something else"); + } + + #[test] + fn replaces_invalid_sequence() { + let mut ctx = super::SerialContext::default(); + let text = ctx.process_utf8(b"Hello, \xFF world!"); + assert_eq!(text, "Hello, \u{FFFD} world!"); + } + + #[test] + fn can_replace_unfinished_incomplete_sequence() { + let mut ctx = super::SerialContext::default(); + let mut incomplete = Vec::from("Hello, ".as_bytes()); + let utf8 = "🙈".as_bytes(); + incomplete.extend_from_slice(&utf8[..utf8.len() - 1]); + let text = ctx.process_utf8(&incomplete); + assert_eq!(text, "Hello, "); + + let text = ctx.process_utf8(b" world!"); + assert_eq!(text, "\u{FFFD} world!"); + } + + #[test] + fn can_merge_incomplete_sequence() { + let mut ctx = super::SerialContext::default(); + let mut incomplete = Vec::from("Hello, ".as_bytes()); + let utf8 = "🙈".as_bytes(); + incomplete.extend_from_slice(&utf8[..utf8.len() - 1]); + + let text = ctx.process_utf8(&incomplete); + assert_eq!(text, "Hello, "); + + let text = ctx.process_utf8(&utf8[utf8.len() - 1..]); + assert_eq!(text, "🙈"); + } + + #[test] + fn issue_457() { + let mut ctx = super::SerialContext::default(); + let mut result = String::new(); + + result.push_str(&ctx.process_utf8(&[0x48])); + result.push_str(&ctx.process_utf8(&[0x65, 0x6C, 0x6C])); + result.push_str(&ctx.process_utf8(&[ + 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x20, 0x77, 0x69, 0x74, + ])); + result.push_str(&ctx.process_utf8(&[ + 0x68, 0x20, 0x55, 0x54, 0x46, 0x3A, 0x20, 0x77, 0x79, 0x73, 0x79, + ])); + result.push_str(&ctx.process_utf8(&[0xC5, 0x82, 0x61, 0x6D, 0x0A])); + + assert_eq!(result, "Hello world! with UTF: wysyłam\r\n"); + } +}