diff --git a/Sources/FoundationEssentials/String/IANACharsetNames.swift b/Sources/FoundationEssentials/String/IANACharsetNames.swift new file mode 100644 index 000000000..8f3e88f09 --- /dev/null +++ b/Sources/FoundationEssentials/String/IANACharsetNames.swift @@ -0,0 +1,213 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + + +// WARNING: DO NOT EDIT THIS FILE DIRECTLY. +// This is auto-generated by `update-iana-charset-names`. + + +extension IANACharset { + /// IANA Charset `US-ASCII`. + static let usASCII = IANACharset( + preferredMIMEName: "US-ASCII", + name: "US-ASCII", + aliases: [ + "iso-ir-6", + "ANSI_X3.4-1968", + "ANSI_X3.4-1986", + "ISO_646.irv:1991", + "ISO646-US", + "US-ASCII", + "us", + "IBM367", + "cp367", + "csASCII", + ] + ) + + /// IANA Charset `ISO-8859-1`. + static let iso8859_1 = IANACharset( + preferredMIMEName: "ISO-8859-1", + name: "ISO_8859-1:1987", + aliases: [ + "iso-ir-100", + "ISO_8859-1", + "ISO-8859-1", + "latin1", + "l1", + "IBM819", + "CP819", + "csISOLatin1", + ] + ) + + /// IANA Charset `ISO-8859-2`. + static let iso8859_2 = IANACharset( + preferredMIMEName: "ISO-8859-2", + name: "ISO_8859-2:1987", + aliases: [ + "iso-ir-101", + "ISO_8859-2", + "ISO-8859-2", + "latin2", + "l2", + "csISOLatin2", + ] + ) + + /// IANA Charset `Shift_JIS`. + static let shiftJIS = IANACharset( + preferredMIMEName: "Shift_JIS", + name: "Shift_JIS", + aliases: [ + "MS_Kanji", + "csShiftJIS", + ] + ) + + /// IANA Charset `EUC-JP`. + static let eucJP = IANACharset( + preferredMIMEName: "EUC-JP", + name: "Extended_UNIX_Code_Packed_Format_for_Japanese", + aliases: [ + "csEUCPkdFmtJapanese", + "EUC-JP", + ] + ) + + /// IANA Charset `ISO-2022-JP`. + static let iso2022JP = IANACharset( + preferredMIMEName: "ISO-2022-JP", + name: "ISO-2022-JP", + aliases: [ + "csISO2022JP", + ] + ) + + /// IANA Charset `UTF-8`. + static let utf8 = IANACharset( + preferredMIMEName: nil, + name: "UTF-8", + aliases: [ + "csUTF8", + ] + ) + + /// IANA Charset `UTF-16BE`. + static let utf16BE = IANACharset( + preferredMIMEName: nil, + name: "UTF-16BE", + aliases: [ + "csUTF16BE", + ] + ) + + /// IANA Charset `UTF-16LE`. + static let utf16LE = IANACharset( + preferredMIMEName: nil, + name: "UTF-16LE", + aliases: [ + "csUTF16LE", + ] + ) + + /// IANA Charset `UTF-16`. + static let utf16 = IANACharset( + preferredMIMEName: nil, + name: "UTF-16", + aliases: [ + "csUTF16", + ] + ) + + /// IANA Charset `UTF-32`. + static let utf32 = IANACharset( + preferredMIMEName: nil, + name: "UTF-32", + aliases: [ + "csUTF32", + ] + ) + + /// IANA Charset `UTF-32BE`. + static let utf32BE = IANACharset( + preferredMIMEName: nil, + name: "UTF-32BE", + aliases: [ + "csUTF32BE", + ] + ) + + /// IANA Charset `UTF-32LE`. + static let utf32LE = IANACharset( + preferredMIMEName: nil, + name: "UTF-32LE", + aliases: [ + "csUTF32LE", + ] + ) + + /// IANA Charset `macintosh`. + static let macintosh = IANACharset( + preferredMIMEName: nil, + name: "macintosh", + aliases: [ + "mac", + "csMacintosh", + ] + ) + + /// IANA Charset `windows-1250`. + static let windows1250 = IANACharset( + preferredMIMEName: nil, + name: "windows-1250", + aliases: [ + "cswindows1250", + ] + ) + + /// IANA Charset `windows-1251`. + static let windows1251 = IANACharset( + preferredMIMEName: nil, + name: "windows-1251", + aliases: [ + "cswindows1251", + ] + ) + + /// IANA Charset `windows-1252`. + static let windows1252 = IANACharset( + preferredMIMEName: nil, + name: "windows-1252", + aliases: [ + "cswindows1252", + ] + ) + + /// IANA Charset `windows-1253`. + static let windows1253 = IANACharset( + preferredMIMEName: nil, + name: "windows-1253", + aliases: [ + "cswindows1253", + ] + ) + + /// IANA Charset `windows-1254`. + static let windows1254 = IANACharset( + preferredMIMEName: nil, + name: "windows-1254", + aliases: [ + "cswindows1254", + ] + ) +} diff --git a/Sources/FoundationEssentials/String/String+Encoding+Names.swift b/Sources/FoundationEssentials/String/String+Encoding+Names.swift new file mode 100644 index 000000000..ba2cc32ef --- /dev/null +++ b/Sources/FoundationEssentials/String/String+Encoding+Names.swift @@ -0,0 +1,287 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + + +// MARK: - Private extensions for parsing encoding names + +private extension Unicode.Scalar { + /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace". + /// + /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace + var _isASCIIWhitespace: Bool { + switch self.value { + case 0x09, 0x0A, 0x0C, 0x0D, 0x20: true + default: false + } + } +} + +private extension String { + var _trimmed: Substring.UnicodeScalarView { + let scalars = self.unicodeScalars + let isNonWhitespace: (Unicode.Scalar) -> Bool = { !$0._isASCIIWhitespace } + guard let firstIndexOfNonWhitespace = scalars.firstIndex(where: isNonWhitespace), + let lastIndexOfNonWhitespace = scalars.lastIndex(where: isNonWhitespace) else { + return Substring.UnicodeScalarView() + } + return scalars[firstIndexOfNonWhitespace...lastIndexOfNonWhitespace] + } +} + +/// A type that holds a `Unicode.Scalar` where its value is compared case-insensitively with others' +/// _if the value is within ASCII range_. +private struct ASCIICaseInsensitiveUnicodeScalar: Equatable, + ExpressibleByUnicodeScalarLiteral { + typealias UnicodeScalarLiteralType = Unicode.Scalar.UnicodeScalarLiteralType + + let scalar: Unicode.Scalar + + @inlinable + init(_ scalar: Unicode.Scalar) { + assert(scalar.isASCII) + self.scalar = scalar + } + + init(unicodeScalarLiteral value: Unicode.Scalar.UnicodeScalarLiteralType) { + self.init(Unicode.Scalar(unicodeScalarLiteral: value)) + } + + @inlinable + static func ==( + lhs: ASCIICaseInsensitiveUnicodeScalar, + rhs: ASCIICaseInsensitiveUnicodeScalar + ) -> Bool { + if lhs.scalar == rhs.scalar { + return true + } else if ("A"..."Z").contains(lhs.scalar) { + return lhs.scalar.value + 0x20 == rhs.scalar.value + } else if ("a"..."z").contains(lhs.scalar) { + return lhs.scalar.value - 0x20 == rhs.scalar.value + } + return false + } +} + +/// A type to tokenize string for `String.Encoding` names. +internal protocol StringEncodingNameTokenizer: ~Copyable { + associatedtype Token: Equatable + init(name: String) + mutating func nextToken() throws -> Token? +} + +extension StringEncodingNameTokenizer where Self: ~Copyable { + mutating func hasEqualTokens(with other: consuming Self) throws -> Bool { + while let myToken = try self.nextToken() { + guard let otherToken = try other.nextToken(), + myToken == otherToken else { + return false + } + } + return try other.nextToken() == nil + } +} + + +/// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s. +private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable { + typealias Token = ASCIICaseInsensitiveUnicodeScalar + + enum Error: Swift.Error { + case nonASCII + } + + let scalars: Substring.UnicodeScalarView + + var _currentIndex: Substring.UnicodeScalarView.Index + + init(name: String) { + self.scalars = name._trimmed + self._currentIndex = scalars.startIndex + } + + mutating func nextToken() throws -> Token? { + guard _currentIndex < scalars.endIndex else { + return nil + } + let scalar = scalars[_currentIndex] + guard scalar.isASCII else { throw Error.nonASCII } + defer { + scalars.formIndex(after: &_currentIndex) + } + return ASCIICaseInsensitiveUnicodeScalar(scalar) + } +} + + +private extension String { + func isEqual( + to other: String, + tokenizedBy tokenizer: T.Type + ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable { + do { + var myTokenizer = T(name: self) + let otherTokenizer = T(name: other) + return try myTokenizer.hasEqualTokens(with: otherTokenizer) + } catch { + // Any errors imply that `self` or `other` contains invalid characters. + return false + } + } +} + + +// MARK: - IANA Charset Names + +/// Info about IANA Charset. +internal struct IANACharset { + /// Preferred MIME Name + let preferredMIMEName: String? + + /// The name of this charset + let name: String + + /// The aliases of this charset + let aliases: Array + + var representativeName: String { + return preferredMIMEName ?? name + } + + init(preferredMIMEName: String?, name: String, aliases: Array) { + self.preferredMIMEName = preferredMIMEName + self.name = name + self.aliases = aliases + } + + func matches( + _ string: String, + tokenizedBy tokenizer: T.Type + ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable { + if let preferredMIMEName = self.preferredMIMEName, + preferredMIMEName.isEqual(to: string, tokenizedBy: tokenizer) { + return true + } + if name.isEqual(to: string, tokenizedBy: tokenizer) { + return true + } + for alias in aliases { + if alias.isEqual(to: string, tokenizedBy: tokenizer) { + return true + } + } + return false + } +} + + +// MARK: - `String.Encoding` Names + +extension String.Encoding { + private var _ianaCharset: IANACharset? { + switch self { + case .utf8: .utf8 + case .ascii: .usASCII + case .japaneseEUC: .eucJP + case .isoLatin1: .iso8859_1 + case .shiftJIS: .shiftJIS + case .isoLatin2: .iso8859_2 + case .unicode: .utf16 + case .windowsCP1251: .windows1251 + case .windowsCP1252: .windows1252 + case .windowsCP1253: .windows1253 + case .windowsCP1254: .windows1254 + case .windowsCP1250: .windows1250 + case .iso2022JP: .iso2022JP + case .macOSRoman: .macintosh + case .utf16BigEndian: .utf16BE + case .utf16LittleEndian: .utf16LE + case .utf32: .utf32 + case .utf32BigEndian: .utf32BE + case .utf32LittleEndian: .utf32LE + default: nil + } + } + + /// The name of this encoding that is compatible with the one of the IANA registry "charset". + @available(FoundationPreview 6.3, *) + public var ianaName: String? { + return _ianaCharset?.representativeName + } + + /// Creates an instance from the name of the IANA registry "charset". + /// + /// - Note: The given name is compared to each IANA "charset" name + /// with ASCII case-insensitive collation + /// to determine which encoding is suitable. + @available(FoundationPreview 6.3, *) + public init?(ianaName charsetName: String) { + func __determineEncoding() -> String.Encoding? { + func __matches(_ charsets: IANACharset...) -> Bool { + assert(!charsets.isEmpty) + return charsets.contains { + $0.matches( + charsetName, + tokenizedBy: ASCIICaseInsensitiveTokenizer.self + ) + } + } + + return if __matches(.utf8) { + .utf8 + } else if __matches(.usASCII) { + .ascii + } else if __matches(.eucJP) { + .japaneseEUC + } else if __matches(.iso8859_1) { + .isoLatin1 + } else if __matches(.shiftJIS) { + .shiftJIS + } else if __matches(.iso8859_2) { + .isoLatin2 + } else if __matches(.utf16) { + .utf16 + } else if __matches(.windows1251) { + .windowsCP1251 + } else if __matches(.windows1252) { + .windowsCP1252 + } else if __matches(.windows1253) { + .windowsCP1253 + } else if __matches(.windows1254) { + .windowsCP1254 + } else if __matches(.windows1250) { + .windowsCP1250 + } else if __matches(.iso2022JP) { + .iso2022JP + } else if __matches(.macintosh) { + .macOSRoman + } else if __matches(.utf16BE) { + .utf16BigEndian + } else if __matches(.utf16LE) { + .utf16LittleEndian + } else if __matches(.utf32) { + .utf32 + } else if __matches(.utf32BE) { + .utf32BigEndian + } else if __matches(.utf32LE) { + .utf32LittleEndian + } else { + nil + } + } + + guard let encoding = __determineEncoding() else { + return nil + } + self = encoding + } +} + diff --git a/Tests/FoundationEssentialsTests/StringTests.swift b/Tests/FoundationEssentialsTests/StringTests.swift index 26286be15..6a6781874 100644 --- a/Tests/FoundationEssentialsTests/StringTests.swift +++ b/Tests/FoundationEssentialsTests/StringTests.swift @@ -1397,6 +1397,69 @@ private struct StringTests { "abcd🎺efgh" ]) } + + @Test func encodingNames() { + // Encoding to Name + #expect(String.Encoding.ascii.ianaName == "US-ASCII") + #expect(String.Encoding.nextstep.ianaName == nil) + #expect(String.Encoding.japaneseEUC.ianaName == "EUC-JP") + #expect(String.Encoding.utf8.ianaName == "UTF-8") + #expect(String.Encoding.isoLatin1.ianaName == "ISO-8859-1") + #expect(String.Encoding.symbol.ianaName == nil) + #expect(String.Encoding.nonLossyASCII.ianaName == nil) + #expect(String.Encoding.shiftJIS.ianaName == "Shift_JIS") + #expect(String.Encoding.isoLatin2.ianaName == "ISO-8859-2") + #expect(String.Encoding.unicode.ianaName == "UTF-16") + #expect(String.Encoding.windowsCP1251.ianaName == "windows-1251") + #expect(String.Encoding.windowsCP1252.ianaName == "windows-1252") + #expect(String.Encoding.windowsCP1253.ianaName == "windows-1253") + #expect(String.Encoding.windowsCP1254.ianaName == "windows-1254") + #expect(String.Encoding.windowsCP1250.ianaName == "windows-1250") + #expect(String.Encoding.iso2022JP.ianaName == "ISO-2022-JP") + #expect(String.Encoding.macOSRoman.ianaName == "macintosh") + #expect(String.Encoding.utf16BigEndian.ianaName == "UTF-16BE") + #expect(String.Encoding.utf16LittleEndian.ianaName == "UTF-16LE") + #expect(String.Encoding.utf32.ianaName == "UTF-32") + #expect(String.Encoding.utf32BigEndian.ianaName == "UTF-32BE") + #expect(String.Encoding.utf32LittleEndian.ianaName == "UTF-32LE") + #expect(String.Encoding(rawValue: .max).ianaName == nil) + + // Name to Encoding + #expect(String.Encoding(ianaName: "us-ascii") == .ascii) + #expect(String.Encoding(ianaName: "iso-ir-2") == nil) + #expect(String.Encoding(ianaName: "x-nextstep") == nil) + #expect(String.Encoding(ianaName: "euc-jp") == .japaneseEUC) + #expect(String.Encoding(ianaName: "CP51932") == nil) + #expect(String.Encoding(ianaName: "utf-8") == .utf8) + #expect(String.Encoding(ianaName: "iso_8859-1") == .isoLatin1) + #expect(String.Encoding(ianaName: "x-mac-symbol") == nil) + #expect(String.Encoding(ianaName: "Adobe-symbol-encoding") == nil) + #expect(String.Encoding(ianaName: "cp932") == nil) + #expect(String.Encoding(ianaName: "shift_jis") == .shiftJIS) + #expect(String.Encoding(ianaName: "windows-31j") == nil) + #expect(String.Encoding(ianaName: "iso_8859-2") == .isoLatin2) + #expect(String.Encoding(ianaName: "utf-16") == .utf16) + #expect(String.Encoding(ianaName: "iso-10646-ucs-2") == nil) + #expect(String.Encoding(ianaName: "unicode-1-1") == nil) + #expect(String.Encoding(ianaName: "windows-1251") == .windowsCP1251) + #expect(String.Encoding(ianaName: "windows-1252") == .windowsCP1252) + #expect(String.Encoding(ianaName: "ISO-8859-1-Windows-3.0-Latin-1") == nil) + #expect(String.Encoding(ianaName: "ISO-8859-1-Windows-3.1-Latin-1") == nil) + #expect(String.Encoding(ianaName: "windows-1253") == .windowsCP1253) + #expect(String.Encoding(ianaName: "windows-1254") == .windowsCP1254) + #expect(String.Encoding(ianaName: "iso-8859-9-windows-Latin-5") == nil) + #expect(String.Encoding(ianaName: "windows-1250") == .windowsCP1250) + #expect(String.Encoding(ianaName: "iso-8859-2-windows-Latin-2") == nil) + #expect(String.Encoding(ianaName: "iso-2022-jp") == .iso2022JP) + #expect(String.Encoding(ianaName: "macintosh") == .macOSRoman) + #expect(String.Encoding(ianaName: "utf-16be") == .utf16BigEndian) + #expect(String.Encoding(ianaName: "utf-16le") == .utf16LittleEndian) + #expect(String.Encoding(ianaName: "utf-32") == .utf32) + #expect(String.Encoding(ianaName: "iso-10646-ucs-4") == nil) + #expect(String.Encoding(ianaName: "utf-32be") == .utf32BigEndian) + #expect(String.Encoding(ianaName: "utf-32le") == .utf32LittleEndian) + #expect(String.Encoding(ianaName: "foo-bar-baz") == nil) + } } // MARK: - Helper functions diff --git a/utils/update-iana-charset-names b/utils/update-iana-charset-names new file mode 100755 index 000000000..23d9a2ef8 --- /dev/null +++ b/utils/update-iana-charset-names @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +##===----------------------------------------------------------------------===## +## +## This source file is part of the Swift.org open source project +## +## Copyright (c) 2025 Apple Inc. and the Swift project authors +## Licensed under Apache License v2.0 with Runtime Library Exception +## +## See https://swift.org/LICENSE.txt for license information +## See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +## +##===----------------------------------------------------------------------===## + +# This is a shell script that generates a Swift source code file which contains +# the list of IANA "Character Sets". + +set -eu + +declare -r commandName="$(basename "$0")" +declare -r utilsDir="$(cd "$(dirname "$0")" && pwd)" +declare -r foundationRepoDir="$(cd "${utilsDir}/.." && pwd)" +declare -r targetSwiftFileRelativePath="Sources/FoundationEssentials/String/IANACharsetNames.swift" + +declare -r copyrightYear=$( + currentYear=$(date +%Y) + if [[ $currentYear -eq 2025 ]]; then + echo 2025 + else + echo 2025-${currentYear} + fi +) +declare -r swiftLicenseHeader=" +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) ${copyrightYear} Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// +" +declare -r warningComment=" +// WARNING: DO NOT EDIT THIS FILE DIRECTLY. +// This is auto-generated by \`${commandName}\`. + +" + +echo "Generating Swift source code..." 1>&2 +declare generatedCode +generatedCode=$( + echo "${swiftLicenseHeader##$'\n'}" + echo "$warningComment" + python3 "${utilsDir}/${commandName}-impl.py" +) + +echo "Writing the code to '${targetSwiftFileRelativePath}'..." 1>&2 +echo "$generatedCode" >"${foundationRepoDir}/${targetSwiftFileRelativePath}" + +echo "Done." 1>&2 diff --git a/utils/update-iana-charset-names-impl.py b/utils/update-iana-charset-names-impl.py new file mode 100644 index 000000000..b323cf206 --- /dev/null +++ b/utils/update-iana-charset-names-impl.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +##===----------------------------------------------------------------------===## +## +## This source file is part of the Swift.org open source project +## +## Copyright (c) 2025 Apple Inc. and the Swift project authors +## Licensed under Apache License v2.0 with Runtime Library Exception +## +## See https://swift.org/LICENSE.txt for license information +## See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +## +##===----------------------------------------------------------------------===## + +""" +This is a python script that converts an XML file containing the list of IANA +"Character Sets" to Swift source code. +This script generates minimum code and is intended to be executed by other shell +script. +""" + +import re +import urllib.request as request +import xml.etree.ElementTree as ElemTree +from typing import List, Optional + +REQUIRED_CHARSET_NAMES: List[str] = [ + "UTF-8", + "US-ASCII", + "EUC-JP", + "ISO-8859-1", + "Shift_JIS", + "ISO-8859-2", + "UTF-16", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1250", + "ISO-2022-JP", + "macintosh", + "UTF-16BE", + "UTF-16LE", + "UTF-32", + "UTF-32BE", + "UTF-32LE", +] +CHARSETS_XML_URL = "https://www.iana.org/assignments/character-sets/character-sets.xml" +CHARSETS_XML_STRING = request.urlopen(request.Request(CHARSETS_XML_URL)).read() +CHARSETS_XML_ROOT = ElemTree.fromstring(CHARSETS_XML_STRING) +CHARSETS_XML_NS = "http://www.iana.org/assignments" +CHARSETS_XML_RECORD_ELEMENTS = CHARSETS_XML_ROOT.findall( + "./{%s}registry/{%s}record" % (CHARSETS_XML_NS, CHARSETS_XML_NS) +) +SWIFT_CODE_INDENT = " " + + +class IANACharsetNameRecord: + """Representation of element in 'character-sets.xml' + + The structure of element is as blow: + + US-ASCII + + 3 + ANSI X3.4-1986 + iso-ir-6 + ANSI_X3.4-1968 + ANSI_X3.4-1986 + ISO_646.irv:1991 + ISO646-US + US-ASCII + us + IBM367 + cp367 + csASCII + US-ASCII + + """ + + def __init__(self, recordElem: ElemTree.Element): + self._name: str = recordElem.find('./{%s}name' % (CHARSETS_XML_NS)).text + self._preferredMIMEName: Optional[str] = getattr( + recordElem.find('./{%s}preferred_alias' % (CHARSETS_XML_NS)), + 'text', + None + ) + self._aliases: List[str] = list(map( + lambda aliasElem: aliasElem.text, + recordElem.findall('./{%s}alias' % (CHARSETS_XML_NS)) + )) + self._camelCasedName = None + + @property + def name(self) -> str: + return self._name + + @property + def preferredMIMEName(self) -> Optional[str]: + return self._preferredMIMEName + + @property + def representativeName(self) -> str: + return self.preferredMIMEName or self.name + + @property + def aliases(self) -> List[str]: + return self._aliases + + @property + def camelCasedName(self) -> str: + if (self._camelCasedName is not None): + return self._camelCasedName + + camelCasedName = "" + previousWord = None + for ii, word in enumerate(re.split(r"[^0-9A-Za-z]", self.representativeName)): + if previousWord is None: + camelCasedName = word.lower() + else: + if re.search(r"[0-9]$", previousWord) and re.search(r"^[0-9]", word): + camelCasedName += "_" + + if (re.fullmatch("[0-9]*[A-Z]+", word)): + camelCasedName += word + else: + camelCasedName += word.capitalize() + + previousWord = word + + self._camelCasedName = camelCasedName + return camelCasedName + + @property + def swiftCodeLines(self) -> List[str]: + def __stringLiteralOrNil(string: Optional[str]) -> str: + if (string is None): + return 'nil' + return f'"{string}"' + + lines: List[str] = [] + lines.append(f"/// IANA Charset `{self.representativeName}`.") + lines.append(f"static let {self.camelCasedName} = IANACharset(") + lines.append(f"{SWIFT_CODE_INDENT}preferredMIMEName: { + __stringLiteralOrNil(self.preferredMIMEName) + },") + lines.append(f'{SWIFT_CODE_INDENT}name: "{self.name}",') + lines.append(f"{SWIFT_CODE_INDENT}aliases: [") + for alias in self.aliases: + lines.append(f"{SWIFT_CODE_INDENT * 2}\"{alias}\",") + lines.append(f"{SWIFT_CODE_INDENT}]") + lines.append(")") + return lines + + +def generateSwiftCode() -> str: + result = "extension IANACharset {" + for record in map( + lambda recordElem: IANACharsetNameRecord(recordElem), + CHARSETS_XML_RECORD_ELEMENTS + ): + if (record.representativeName not in REQUIRED_CHARSET_NAMES): + continue + result += "\n" + result += "\n".join(map( + lambda line: SWIFT_CODE_INDENT + line, + record.swiftCodeLines + )) + result += "\n" + result += "}\n" + return result + + +if __name__ == "__main__": + print(generateSwiftCode())