1- /* auto-generated on 2023-04-20 18:39:35 -0400. Do not edit! */
1+ /* auto-generated on 2023-04-26 16:43:37 -0400. Do not edit! */
22/* begin file src/ada.cpp */
33#include "ada.h"
44/* begin file src/checkers.cpp */
@@ -116,12 +116,13 @@ ada_really_inline constexpr bool verify_dns_length(
116116
117117ADA_PUSH_DISABLE_ALL_WARNINGS
118118/* begin file src/ada_idna.cpp */
119- /* auto-generated on 2023-03-28 11:03:13 -0400. Do not edit! */
119+ /* auto-generated on 2023-04-26 14:14:42 -0400. Do not edit! */
120120/* begin file src/idna.cpp */
121121/* begin file src/unicode_transcoding.cpp */
122122
123123#include <cstdint>
124124#include <cstring>
125+
125126namespace ada::idna {
126127
127128size_t utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
@@ -2750,7 +2751,9 @@ uint32_t find_range_index(uint32_t key) {
27502751}
27512752
27522753bool ascii_has_upper_case(char* input, size_t length) {
2753- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
2754+ auto broadcast = [](uint8_t v) -> uint64_t {
2755+ return 0x101010101010101ull * v;
2756+ };
27542757 uint64_t broadcast_80 = broadcast(0x80);
27552758 uint64_t broadcast_Ap = broadcast(128 - 'A');
27562759 uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -2772,7 +2775,9 @@ bool ascii_has_upper_case(char* input, size_t length) {
27722775}
27732776
27742777void ascii_map(char* input, size_t length) {
2775- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
2778+ auto broadcast = [](uint8_t v) -> uint64_t {
2779+ return 0x101010101010101ull * v;
2780+ };
27762781 uint64_t broadcast_80 = broadcast(0x80);
27772782 uint64_t broadcast_Ap = broadcast(128 - 'A');
27782783 uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -7999,9 +8004,10 @@ const char32_t uninorms::decomposition_data[] = {
79998004namespace ada::idna {
80008005
80018006void normalize(std::u32string& input) {
8002- // [Normalize](https://www.unicode.org/reports/tr46/#ProcessingStepNormalize).
8003- // Normalize
8004- // the domain_name string to Unicode Normalization Form C.
8007+ /**
8008+ * Normalize the domain_name string to Unicode Normalization Form C.
8009+ * @see https://www.unicode.org/reports/tr46/#ProcessingStepNormalize
8010+ */
80058011 ufal::unilib::uninorms::nfc(input);
80068012}
80078013
@@ -8229,7 +8235,6 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) {
82298235} // namespace ada::idna
82308236/* end file src/punycode.cpp */
82318237/* begin file src/validity.cpp */
8232-
82338238#include <algorithm>
82348239#include <string_view>
82358240
@@ -9617,18 +9622,18 @@ constexpr static uint8_t is_forbidden_domain_code_point_table[] = {
96179622
96189623static_assert(sizeof(is_forbidden_domain_code_point_table) == 256);
96199624
9620- inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept {
9625+ inline bool is_forbidden_domain_code_point(const char c) noexcept {
96219626 return is_forbidden_domain_code_point_table[uint8_t(c)];
96229627}
96239628
9624- // We return "" on error. For now.
9625- std::string from_ascii_to_ascii(std::string_view ut8_string) {
9626- static const std::string error = "";
9627- if (std::any_of(ut8_string.begin(), ut8_string.end(),
9628- is_forbidden_domain_code_point)) {
9629- return error;
9630- }
9629+ bool contains_forbidden_domain_code_point(std::string_view view) {
9630+ return (
9631+ std::any_of(view.begin(), view.end(), is_forbidden_domain_code_point));
9632+ }
96319633
9634+ // We return "" on error.
9635+ static std::string from_ascii_to_ascii(std::string_view ut8_string) {
9636+ static const std::string error = "";
96329637 // copy and map
96339638 // we could be more efficient by avoiding the copy when unnecessary.
96349639 std::string mapped_string = std::string(ut8_string);
@@ -9682,7 +9687,7 @@ std::string from_ascii_to_ascii(std::string_view ut8_string) {
96829687 return out;
96839688}
96849689
9685- // We return "" on error. For now.
9690+ // We return "" on error.
96869691std::string to_ascii(std::string_view ut8_string) {
96879692 if (is_ascii(ut8_string)) {
96889693 return from_ascii_to_ascii(ut8_string);
@@ -9769,11 +9774,6 @@ std::string to_ascii(std::string_view ut8_string) {
97699774 out.push_back('.');
97709775 }
97719776 }
9772-
9773- if (std::any_of(out.begin(), out.end(), is_forbidden_domain_code_point)) {
9774- return error;
9775- }
9776-
97779777 return out;
97789778}
97799779} // namespace ada::idna
@@ -9842,7 +9842,9 @@ ADA_POP_DISABLE_WARNINGS
98429842namespace ada::unicode {
98439843
98449844constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
9845- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
9845+ auto broadcast = [](uint8_t v) -> uint64_t {
9846+ return 0x101010101010101ull * v;
9847+ };
98469848 uint64_t broadcast_80 = broadcast(0x80);
98479849 uint64_t broadcast_Ap = broadcast(128 - 'A');
98489850 uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -9873,7 +9875,9 @@ ada_really_inline constexpr bool has_tabs_or_newline(
98739875 auto has_zero_byte = [](uint64_t v) {
98749876 return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
98759877 };
9876- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
9878+ auto broadcast = [](uint8_t v) -> uint64_t {
9879+ return 0x101010101010101ull * v;
9880+ };
98779881 size_t i = 0;
98789882 uint64_t mask1 = broadcast('\r');
98799883 uint64_t mask2 = broadcast('\n');
@@ -10252,7 +10256,8 @@ bool to_ascii(std::optional<std::string>& out, const std::string_view plain,
1025210256 }
1025310257 // input is a non-empty UTF-8 string, must be percent decoded
1025410258 std::string idna_ascii = ada::idna::to_ascii(input);
10255- if (idna_ascii.empty()) {
10259+ if (idna_ascii.empty() || contains_forbidden_domain_code_point(
10260+ idna_ascii.data(), idna_ascii.size())) {
1025610261 return false;
1025710262 }
1025810263 out = std::move(idna_ascii);
@@ -10627,7 +10632,9 @@ ada_really_inline size_t find_next_host_delimiter_special(
1062710632 auto index_of_first_set_byte = [](uint64_t v) {
1062810633 return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
1062910634 };
10630- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
10635+ auto broadcast = [](uint8_t v) -> uint64_t {
10636+ return 0x101010101010101ull * v;
10637+ };
1063110638 size_t i = location;
1063210639 uint64_t mask1 = broadcast(':');
1063310640 uint64_t mask2 = broadcast('/');
@@ -10690,7 +10697,9 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
1069010697 auto index_of_first_set_byte = [](uint64_t v) {
1069110698 return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
1069210699 };
10693- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
10700+ auto broadcast = [](uint8_t v) -> uint64_t {
10701+ return 0x101010101010101ull * v;
10702+ };
1069410703 size_t i = location;
1069510704 uint64_t mask1 = broadcast(':');
1069610705 uint64_t mask2 = broadcast('/');
@@ -11016,7 +11025,9 @@ find_authority_delimiter_special(std::string_view view) noexcept {
1101611025 auto index_of_first_set_byte = [](uint64_t v) {
1101711026 return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
1101811027 };
11019- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
11028+ auto broadcast = [](uint8_t v) -> uint64_t {
11029+ return 0x101010101010101ull * v;
11030+ };
1102011031 size_t i = 0;
1102111032 uint64_t mask1 = broadcast('@');
1102211033 uint64_t mask2 = broadcast('/');
@@ -11064,7 +11075,9 @@ find_authority_delimiter(std::string_view view) noexcept {
1106411075 auto index_of_first_set_byte = [](uint64_t v) {
1106511076 return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
1106611077 };
11067- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
11078+ auto broadcast = [](uint8_t v) -> uint64_t {
11079+ return 0x101010101010101ull * v;
11080+ };
1106811081 size_t i = 0;
1106911082 uint64_t mask1 = broadcast('@');
1107011083 uint64_t mask2 = broadcast('/');
0 commit comments