Skip to content

Commit fbba2c2

Browse files
committed
internal/httpsfv: add support for consuming Display String and Date type
This CL adds consumeDisplayString() and consumeDate() function, meaning that we can now consume all types that are defined within RFC 9651. In future CL, we will add the corresponding parsing function for all the types, so callers of this package will not have to implement their own parsing / formatting. For golang/go#75500 Change-Id: I90aa132d3ab1385b310d821997da13a095cd71bc Reviewed-on: https://go-review.googlesource.com/c/net/+/708015 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Damien Neil <[email protected]> Reviewed-by: Nicholas Husin <[email protected]>
1 parent 47a241f commit fbba2c2

File tree

2 files changed

+272
-3
lines changed

2 files changed

+272
-3
lines changed

internal/httpsfv/httpsfv.go

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package httpsfv
88

99
import (
1010
"slices"
11+
"unicode/utf8"
1112
)
1213

1314
func isLCAlpha(b byte) bool {
@@ -48,6 +49,27 @@ func countLeftWhitespace(s string) int {
4849
return i
4950
}
5051

52+
// https://www.rfc-editor.org/rfc/rfc4648#section-8.
53+
func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) {
54+
decBase16 := func(in byte) (out byte, ok bool) {
55+
if !isDigit(in) && !(in >= 'a' && in <= 'f') {
56+
return 0, false
57+
}
58+
if isDigit(in) {
59+
return in - '0', true
60+
}
61+
return in - 'a' + 10, true
62+
}
63+
64+
if ch1, ok = decBase16(ch1); !ok {
65+
return 0, ok
66+
}
67+
if ch2, ok = decBase16(ch2); !ok {
68+
return 0, ok
69+
}
70+
return ch1<<4 | ch2, true
71+
}
72+
5173
// TODO(nsh): Implement corresponding parse functions for all consume functions
5274
// that exists.
5375

@@ -409,14 +431,85 @@ func consumeBoolean(s string) (consumed, rest string, ok bool) {
409431
return "", s, false
410432
}
411433

434+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date.
435+
func consumeDate(s string) (consumed, rest string, ok bool) {
436+
if len(s) == 0 || s[0] != '@' {
437+
return "", s, false
438+
}
439+
if _, rest, ok = consumeIntegerOrDecimal(s[1:]); !ok {
440+
return "", s, ok
441+
}
442+
consumed = s[:len(s)-len(rest)]
443+
if slices.Contains([]byte(consumed), '.') {
444+
return "", s, false
445+
}
446+
return consumed, rest, ok
447+
}
448+
449+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string.
450+
func consumeDisplayString(s string) (consumed, rest string, ok bool) {
451+
// To prevent excessive allocation, especially when input is large, we
452+
// maintain a buffer of 4 bytes to keep track of the last rune we
453+
// encounter. This way, we can validate that the display string conforms to
454+
// UTF-8 without actually building the whole string.
455+
var lastRune [4]byte
456+
var runeLen int
457+
isPartOfValidRune := func(ch byte) bool {
458+
lastRune[runeLen] = ch
459+
runeLen++
460+
if utf8.FullRune(lastRune[:runeLen]) {
461+
r, s := utf8.DecodeRune(lastRune[:runeLen])
462+
if r == utf8.RuneError {
463+
return false
464+
}
465+
copy(lastRune[:], lastRune[s:runeLen])
466+
runeLen -= s
467+
return true
468+
}
469+
return runeLen <= 4
470+
}
471+
472+
if len(s) <= 1 || s[:2] != `%"` {
473+
return "", s, false
474+
}
475+
i := 2
476+
for i < len(s) {
477+
ch := s[i]
478+
if !isVChar(ch) && !isSP(ch) {
479+
return "", s, false
480+
}
481+
switch ch {
482+
case '"':
483+
if runeLen > 0 {
484+
return "", s, false
485+
}
486+
return s[:i+1], s[i+1:], true
487+
case '%':
488+
if i+2 >= len(s) {
489+
return "", s, false
490+
}
491+
if ch, ok = decOctetHex(s[i+1], s[i+2]); !ok {
492+
return "", s, ok
493+
}
494+
if ok = isPartOfValidRune(ch); !ok {
495+
return "", s, ok
496+
}
497+
i += 3
498+
default:
499+
if ok = isPartOfValidRune(ch); !ok {
500+
return "", s, ok
501+
}
502+
i++
503+
}
504+
}
505+
return "", s, false
506+
}
507+
412508
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
413509
func consumeBareItem(s string) (consumed, rest string, ok bool) {
414510
if len(s) == 0 {
415511
return "", s, false
416512
}
417-
418-
// TODO(nsh): This is currently only up to date with RFC 8941. Implement
419-
// Date and Display string for full feature parity with RFC 9651.
420513
ch := s[0]
421514
switch {
422515
case ch == '-' || isDigit(ch):
@@ -429,6 +522,10 @@ func consumeBareItem(s string) (consumed, rest string, ok bool) {
429522
return consumeByteSequence(s)
430523
case ch == '?':
431524
return consumeBoolean(s)
525+
case ch == '@':
526+
return consumeDate(s)
527+
case ch == '%':
528+
return consumeDisplayString(s)
432529
default:
433530
return "", s, false
434531
}

internal/httpsfv/httpsfv_test.go

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,3 +832,175 @@ func TestConsumeBoolean(t *testing.T) {
832832
}
833833
}
834834
}
835+
836+
func TestConsumeDate(t *testing.T) {
837+
tests := []struct {
838+
name string
839+
in string
840+
want string
841+
wantOk bool
842+
}{
843+
{
844+
name: "valid zero date",
845+
in: "@0",
846+
want: "@0",
847+
wantOk: true,
848+
},
849+
{
850+
name: "valid positive date",
851+
in: "@1659578233",
852+
want: "@1659578233",
853+
wantOk: true,
854+
},
855+
{
856+
name: "valid negative date",
857+
in: "@-1659578233",
858+
want: "@-1659578233",
859+
wantOk: true,
860+
},
861+
{
862+
name: "valid large date",
863+
in: "@25340221440",
864+
want: "@25340221440",
865+
wantOk: true,
866+
},
867+
{
868+
name: "valid small date",
869+
in: "@-62135596800",
870+
want: "@-62135596800",
871+
wantOk: true,
872+
},
873+
{
874+
name: "invalid decimal date",
875+
in: "@1.2",
876+
},
877+
{
878+
name: "valid date with more content after",
879+
in: "@1659578233, foo;bar",
880+
want: "@1659578233",
881+
wantOk: true,
882+
},
883+
}
884+
885+
for _, tc := range tests {
886+
got, gotRest, ok := consumeDate(tc.in)
887+
if ok != tc.wantOk {
888+
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
889+
}
890+
if tc.want != got {
891+
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
892+
}
893+
if got+gotRest != tc.in {
894+
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
895+
}
896+
}
897+
}
898+
899+
func TestConsumeDisplayString(t *testing.T) {
900+
tests := []struct {
901+
name string
902+
in string
903+
want string
904+
wantOk bool
905+
}{
906+
{
907+
name: "valid ascii string",
908+
in: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
909+
want: "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
910+
wantOk: true,
911+
},
912+
{
913+
name: "valid lowercase non-ascii string",
914+
in: `%"f%c3%bc%c3%bc"`,
915+
want: `%"f%c3%bc%c3%bc"`,
916+
wantOk: true,
917+
},
918+
{
919+
name: "invalid uppercase non-ascii string",
920+
in: `%"f%C3%BC%C3%BC"`,
921+
},
922+
{
923+
name: "invalid unqouted string",
924+
in: "%foo",
925+
},
926+
{
927+
name: "invalid string missing initial quote",
928+
in: `%foo"`,
929+
},
930+
{
931+
name: "invalid string missing closing quote",
932+
in: `%"foo`,
933+
},
934+
{
935+
name: "invalid tab in string",
936+
in: "%\"\t\"",
937+
},
938+
{
939+
name: "invalid newline in string",
940+
in: "%\"\n\"",
941+
},
942+
{
943+
name: "invalid single quoted string",
944+
in: `%'foo'`,
945+
},
946+
{
947+
name: "invalid string bad escaping",
948+
in: `%\"foo %a"`,
949+
},
950+
{
951+
name: "valid string with escaped quotes",
952+
in: `%"foo %22bar%22 \\ baz"`,
953+
want: `%"foo %22bar%22 \\ baz"`,
954+
wantOk: true,
955+
},
956+
{
957+
name: "invalid sequence id utf-8 string",
958+
in: `%"%a0%a1"`,
959+
},
960+
{
961+
name: "invalid 2 bytes sequence utf-8 string",
962+
in: `%"%c3%28"`,
963+
},
964+
{
965+
name: "invalid 3 bytes sequence utf-8 string",
966+
in: `%"%e2%28%a1"`,
967+
},
968+
{
969+
name: "invalid 4 bytes sequence utf-8 string",
970+
in: `%"%f0%28%8c%28"`,
971+
},
972+
{
973+
name: "invalid hex utf-8 string",
974+
in: `%"%g0%1w"`,
975+
},
976+
{
977+
name: "valid byte order mark in display string",
978+
in: `%"BOM: %ef%bb%bf"`,
979+
want: `%"BOM: %ef%bb%bf"`,
980+
wantOk: true,
981+
},
982+
{
983+
name: "valid string with content after",
984+
in: `%"foo\nbar", foo;bar`,
985+
want: `%"foo\nbar"`,
986+
wantOk: true,
987+
},
988+
{
989+
name: "invalid unfinished 4 bytes rune",
990+
in: `%"%f0%9f%98"`,
991+
},
992+
}
993+
994+
for _, tc := range tests {
995+
got, gotRest, ok := consumeDisplayString(tc.in)
996+
if ok != tc.wantOk {
997+
t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
998+
}
999+
if tc.want != got {
1000+
t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
1001+
}
1002+
if got+gotRest != tc.in {
1003+
t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
1004+
}
1005+
}
1006+
}

0 commit comments

Comments
 (0)