diff --git a/selfie-lib/src/commonTest/kotlin/com/diffplug/selfie/PerCharacterEscaperTest.kt b/selfie-lib/src/commonTest/kotlin/com/diffplug/selfie/PerCharacterEscaperTest.kt new file mode 100644 index 00000000..373cbac9 --- /dev/null +++ b/selfie-lib/src/commonTest/kotlin/com/diffplug/selfie/PerCharacterEscaperTest.kt @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2016-2023 DiffPlug + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.diffplug.selfie + +import com.diffplug.selfie.PerCharacterEscaper.Companion.selfEscape +import com.diffplug.selfie.PerCharacterEscaper.Companion.specifiedEscape +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFails +import kotlin.test.assertSame + +class PerCharacterEscaperTest { + @Test + fun performanceOptimizationSelf() { + val escaper = selfEscape("`123") + // if nothing gets changed, it should return the exact same value + val abc = "abc" + assertSame(abc, escaper.escape(abc)) + assertSame(abc, escaper.unescape(abc)) + + // otherwise it should have the normal behavior + assertEquals("`1", escaper.escape("1")) + assertEquals("``", escaper.escape("`")) + assertEquals("abc`1`2`3``def", escaper.escape("abc123`def")) + + // in both directions + assertEquals("1", escaper.unescape("`1")) + assertEquals("`", escaper.unescape("``")) + assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def")) + } + + @Test + fun performanceOptimizationSpecific() { + val escaper = specifiedEscape("`a1b2c3d") + // if nothing gets changed, it should return the exact same value + val abc = "abc" + assertSame(abc, escaper.escape(abc)) + assertSame(abc, escaper.unescape(abc)) + + // otherwise it should have the normal behavior + assertEquals("`b", escaper.escape("1")) + assertEquals("`a", escaper.escape("`")) + assertEquals("abc`b`c`d`adef", escaper.escape("abc123`def")) + + // in both directions + assertEquals("1", escaper.unescape("`b")) + assertEquals("`", escaper.unescape("`a")) + assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def")) + } + + @Test + fun cornerCasesSelf() { + val escaper = selfEscape("`123") + // cornercase - escape character without follow-on will throw an error + val exception = assertFails { escaper.unescape("`") } + assertEquals("Escape character '`' can't be the last character in a string.", exception.message) + // escape character followed by non-escape character is fine + assertEquals("a", escaper.unescape("`a")) + } + + @Test + fun cornerCasesSpecific() { + val escaper = specifiedEscape("`a1b2c3d") + // cornercase - escape character without follow-on will throw an error + val exception = assertFails { escaper.unescape("`") } + assertEquals("Escape character '`' can't be the last character in a string.", exception.message) + // escape character followed by non-escape character is fine + assertEquals("e", escaper.unescape("`e")) + } + + @Test + fun roundtrip() { + val escaper = selfEscape("`<>") + val roundtrip = { str: String? -> assertEquals(str, escaper.unescape(escaper.escape(str!!))) } + roundtrip("") + roundtrip("~`/") + } +} diff --git a/selfie-lib/src/jsMain/kotlin/com/diffplug/selfie/PerCharacterEscaper.js.kt b/selfie-lib/src/jsMain/kotlin/com/diffplug/selfie/PerCharacterEscaper.js.kt index e7aa74c2..bf1bce1b 100644 --- a/selfie-lib/src/jsMain/kotlin/com/diffplug/selfie/PerCharacterEscaper.js.kt +++ b/selfie-lib/src/jsMain/kotlin/com/diffplug/selfie/PerCharacterEscaper.js.kt @@ -15,12 +15,212 @@ */ package com.diffplug.selfie -actual class PerCharacterEscaper { - actual fun escape(input: String): String = TODO() - actual fun unescape(input: String): String = TODO() +/** + * If your escape policy is "'123", it means this: + * ``` + * abc->abc + * 123->'1'2'3 + * I won't->I won''t + * ``` + */ +actual class PerCharacterEscaper +/** + * The first character in the string will be uses as the escape character, and all characters will + * be escaped. + */ +private constructor( + private val escapeCodePoint: Int, + private val escapedCodePoints: IntArray, + private val escapedByCodePoints: IntArray +) { + val MIN_SUPPLEMENTARY_CODE_POINT = 0x010000 + val MAX_CODE_POINT = 0X10FFFF + val MIN_LOW_SURROGATE = '\uDC00' + val MIN_HIGH_SURROGATE = '\uD800' + private fun highSurrogate(codePoint: Int): Char { + return ((codePoint ushr 10) + + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)).code) + .toChar() + } + private fun lowSurrogate(codePoint: Int): Char { + return ((codePoint and 0x3ff) + MIN_LOW_SURROGATE.code).toChar() + } + private fun toSurrogates(codePoint: Int, dst: CharArray, index: Int) { + // We write elements "backwards" to guarantee all-or-nothing + dst[index + 1] = lowSurrogate(codePoint) + dst[index] = highSurrogate(codePoint) + } + private fun toChars(codePoint: Int): CharArray { + return if (isBmpCodePoint(codePoint)) { + charArrayOf(codePoint.toChar()) + } else if (isValidCodePoint(codePoint)) { + val result = CharArray(2) + toSurrogates(codePoint, result, 0) + result + } else { + throw IllegalArgumentException("Not a valid Unicode code point: $codePoint") + } + } + private fun isBmpCodePoint(codePoint: Int): Boolean { + return codePoint ushr 16 == 0 + } + private fun isValidCodePoint(codePoint: Int): Boolean { + // Optimized form of: + // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT + val plane = codePoint ushr 16 + return plane < MAX_CODE_POINT + 1 ushr 16 + } + private fun charCount(codePoint: Int): Int { + return if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1 + } + private fun firstOffsetNeedingEscape(input: String): Int { + val length = input.length + var firstOffsetNeedingEscape = -1 + var offset = 0 + outer@ while (offset < length) { + val codepoint = codePointAt(input, offset) + for (escaped in escapedCodePoints) { + if (codepoint == escaped) { + firstOffsetNeedingEscape = offset + break@outer + } + } + offset += charCount(codepoint) + } + return firstOffsetNeedingEscape + } + actual fun escape(input: String): String { + val noEscapes = firstOffsetNeedingEscape(input) + return if (noEscapes == -1) { + input + } else { + val length = input.length + val needsEscapes = length - noEscapes + val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4) + builder.append(input, 0, noEscapes) + var offset = noEscapes + while (offset < length) { + val codepoint = codePointAt(input, offset) + offset += charCount(codepoint) + val idx = indexOf(escapedCodePoints, codepoint) + if (idx == -1) { + builder.append(toChars(codepoint)) + } else { + builder.append(toChars(escapeCodePoint)) + builder.append(toChars(escapedByCodePoints[idx])) + } + } + builder.toString() + } + } + private fun firstOffsetNeedingUnescape(input: String): Int { + val length = input.length + var firstOffsetNeedingEscape = -1 + var offset = 0 + while (offset < length) { + val codepoint = codePointAt(input, offset) + if (codepoint == escapeCodePoint) { + firstOffsetNeedingEscape = offset + break + } + offset += charCount(codepoint) + } + return firstOffsetNeedingEscape + } + actual fun unescape(input: String): String { + val noEscapes = firstOffsetNeedingUnescape(input) + return if (noEscapes == -1) { + input + } else { + val length = input.length + val needsEscapes = length - noEscapes + val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4) + builder.append(input, 0, noEscapes) + var offset = noEscapes + while (offset < length) { + var codepoint = codePointAt(input, offset) + offset += charCount(codepoint) + // if we need to escape something, escape it + if (codepoint == escapeCodePoint) { + if (offset < length) { + codepoint = codePointAt(input, offset) + val idx = indexOf(escapedByCodePoints, codepoint) + if (idx != -1) { + codepoint = escapedCodePoints[idx] + } + offset += charCount(codepoint) + } else { + throw IllegalArgumentException( + "Escape character '" + + toChars(escapeCodePoint).concatToString(0, 0 + 1) + + "' can't be the last character in a string.") + } + } + // we didn't escape it, append it raw + builder.append(toChars(codepoint)) + } + builder.toString() + } + } actual companion object { - actual fun selfEscape(escapePolicy: String): PerCharacterEscaper = TODO() - actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper = TODO() + private fun indexOf(arr: IntArray, target: Int): Int { + for ((index, value) in arr.withIndex()) { + if (value == target) { + return index + } + } + return -1 + } + private fun codePointAt(value: String, offset: Int): Int { + val codePoint = js("value.codePointAt(offset)") + return codePoint + } + private fun codePoints(value: String): IntArray { + val result = mutableListOf() + var offset = 0 + while (offset < value.length) { + val codepoint = codePointAt(value, offset) + result.add(codepoint) + offset += 1 + } + + return result.toIntArray() + } + + /** + * If your escape policy is "'123", it means this: + * ``` + * abc->abc + * 123->'1'2'3 + * I won't->I won''t + * ``` + */ + actual fun selfEscape(escapePolicy: String): PerCharacterEscaper { + val escapedCodePoints = codePoints(escapePolicy) + val escapeCodePoint = escapedCodePoints[0] + return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedCodePoints) + } + + /** + * If your escape policy is "'a1b2c3d", it means this: + * ``` + * abc->abc + * 123->'b'c'd + * I won't->I won'at + * ``` + */ + actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper { + val codePoints = codePoints(escapePolicy) + require(codePoints.size % 2 == 0) + val escapeCodePoint = codePoints[0] + val escapedCodePoints = IntArray(codePoints.size / 2) + val escapedByCodePoints = IntArray(codePoints.size / 2) + for (i in escapedCodePoints.indices) { + escapedCodePoints[i] = codePoints[2 * i] + escapedByCodePoints[i] = codePoints[2 * i + 1] + } + return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedByCodePoints) + } } } diff --git a/selfie-lib/src/jvmTest/kotlin/com/diffplug/selfie/PerCharacterEscaperTest.kt b/selfie-lib/src/jvmTest/kotlin/com/diffplug/selfie/PerCharacterEscaperTest.kt deleted file mode 100644 index ff2f39db..00000000 --- a/selfie-lib/src/jvmTest/kotlin/com/diffplug/selfie/PerCharacterEscaperTest.kt +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2016-2023 DiffPlug - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.diffplug.selfie - -import com.diffplug.selfie.PerCharacterEscaper.Companion.selfEscape -import com.diffplug.selfie.PerCharacterEscaper.Companion.specifiedEscape -import java.util.function.Consumer -import org.junit.jupiter.api.Assertions -import org.junit.jupiter.api.Test - -class PerCharacterEscaperTest { - @Test - fun performanceOptimizationSelf() { - val escaper = selfEscape("`123") - // if nothing gets changed, it should return the exact same value - val abc = "abc" - Assertions.assertSame(abc, escaper.escape(abc)) - Assertions.assertSame(abc, escaper.unescape(abc)) - - // otherwise it should have the normal behavior - Assertions.assertEquals("`1", escaper.escape("1")) - Assertions.assertEquals("``", escaper.escape("`")) - Assertions.assertEquals("abc`1`2`3``def", escaper.escape("abc123`def")) - - // in both directions - Assertions.assertEquals("1", escaper.unescape("`1")) - Assertions.assertEquals("`", escaper.unescape("``")) - Assertions.assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def")) - } - - @Test - fun performanceOptimizationSpecific() { - val escaper = specifiedEscape("`a1b2c3d") - // if nothing gets changed, it should return the exact same value - val abc = "abc" - Assertions.assertSame(abc, escaper.escape(abc)) - Assertions.assertSame(abc, escaper.unescape(abc)) - - // otherwise it should have the normal behavior - Assertions.assertEquals("`b", escaper.escape("1")) - Assertions.assertEquals("`a", escaper.escape("`")) - Assertions.assertEquals("abc`b`c`d`adef", escaper.escape("abc123`def")) - - // in both directions - Assertions.assertEquals("1", escaper.unescape("`b")) - Assertions.assertEquals("`", escaper.unescape("`a")) - Assertions.assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def")) - } - - @Test - fun cornerCasesSelf() { - val escaper = selfEscape("`123") - // cornercase - escape character without follow-on will throw an error - val exception = - Assertions.assertThrows(java.lang.IllegalArgumentException::class.java) { - escaper.unescape("`") - } - Assertions.assertEquals( - "Escape character '`' can't be the last character in a string.", exception.message) - // escape character followed by non-escape character is fine - Assertions.assertEquals("a", escaper.unescape("`a")) - } - - @Test - fun cornerCasesSpecific() { - val escaper = specifiedEscape("`a1b2c3d") - // cornercase - escape character without follow-on will throw an error - val exception = - Assertions.assertThrows(java.lang.IllegalArgumentException::class.java) { - escaper.unescape("`") - } - Assertions.assertEquals( - "Escape character '`' can't be the last character in a string.", exception.message) - // escape character followed by non-escape character is fine - Assertions.assertEquals("e", escaper.unescape("`e")) - } - - @Test - fun roundtrip() { - val escaper = selfEscape("`<>") - val roundtrip = Consumer { str: String? -> - Assertions.assertEquals(str, escaper.unescape(escaper.escape(str!!))) - } - roundtrip.accept("") - roundtrip.accept("~`/") - } -}