Skip to content

Commit c492e3c

Browse files
nicholashusingopherbot
authored andcommitted
internal/httpsfv: add functionality to walk Parameters in HTTP SFV.
This change implements the minimum set of functionality within RFC 8491 that is needed in order for us to be able to extract information out of Parameters type. Rather than parsing the given Structured Field Values as usual, we instead allow users to give us functions that will be invoked as we walk through the SFV. This allows users to still extract information out of SFV, without incurring significant memory allocation, especially when the input is large. If the current API & approach is good, we will proceed further by implementing walk functionality for the rest of the types within RFC 8491: Dictionary, List, Item, and Inner List. After that, we will also add support for Date and Display String to fully support RFC 9651. For golang/go#75500 Change-Id: I838a7267a54fcd64b019be0ac10fe86b1e3e2c8b Reviewed-on: https://go-review.googlesource.com/c/net/+/706755 Auto-Submit: Nicholas Husin <[email protected]> Reviewed-by: Nicholas Husin <[email protected]> Reviewed-by: Damien Neil <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 1034247 commit c492e3c

File tree

2 files changed

+744
-0
lines changed

2 files changed

+744
-0
lines changed

internal/httpsfv/httpsfv.go

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package httpsfv provide functionality for dealing with HTTP Structured Field
6+
// Values.
7+
package httpsfv
8+
9+
import (
10+
"slices"
11+
)
12+
13+
func isLCAlpha(b byte) bool {
14+
return (b >= 'a' && b <= 'z')
15+
}
16+
17+
func isAlpha(b byte) bool {
18+
return isLCAlpha(b) || (b >= 'A' && b <= 'Z')
19+
}
20+
21+
func isDigit(b byte) bool {
22+
return b >= '0' && b <= '9'
23+
}
24+
25+
func isVChar(b byte) bool {
26+
return b >= 0x21 && b <= 0x7e
27+
}
28+
29+
func isSP(b byte) bool {
30+
return b == 0x20
31+
}
32+
33+
func isTChar(b byte) bool {
34+
if isAlpha(b) || isDigit(b) {
35+
return true
36+
}
37+
return slices.Contains([]byte{'!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~'}, b)
38+
}
39+
40+
func countLeftWhitespace(s string) int {
41+
i := 0
42+
for _, ch := range []byte(s) {
43+
if ch != ' ' && ch != '\t' {
44+
break
45+
}
46+
i++
47+
}
48+
return i
49+
}
50+
51+
// TODO(nsh): Implement other consume functions that will be needed to fully
52+
// deal with all possible HTTP SFV, specifically:
53+
// - consumeDictionary(s string, f func(key, val, param string)) (consumed, rest string, ok bool)
54+
// For example, given `a=123,b;a="a", i`, ConsumeDictionary will call f() 3 times
55+
// with the following args:
56+
// - key: `a`, val: `123`, param: ``
57+
// - key: `b`, val: ``, param:`;a="a"`
58+
// - key: `i`, val: ``, param: ``
59+
//
60+
// - consumeList(s string, f func(member, param string)) (consumed, rest string, ok bool)
61+
// For example, given `123.456;i, ("foo" "bar"; lvl=2); lvl=1`, ConsumeList will
62+
// call f() 2 times with the following args:
63+
// - member: `123.456`, param: `i`
64+
// - member: `("foo" "bar"; lvl=2)`, param: `; lvl=1`
65+
//
66+
// - consumeItem(s string, f func(bareItem, param string)) (consumed, rest string, ok bool)
67+
// For example, given `"foo"; bar=baz;foo=bar`, ConsumeItem will call f() with
68+
// the following args:
69+
// - bareItem: `"foo"`, param: `; bar=baz;foo=bar`
70+
//
71+
// - consumeInnerList(s string f func(bareItem, param, listParam string)) (consumed, rest string, ok bool)
72+
// For example, given `("foo"; a=1;b=2 "bar";baz;lvl=2);lvl=1`, ConsumeInnerList
73+
// will call f() 2 times with the following args:
74+
// - bareItem: `"foo"`, param: `; a=1;b=2`, listParam: `;lvl=1`
75+
// - bareItem: `"bar"`, param: `;baz;lvl=2`, listParam: `;lvl=1`
76+
77+
// TODO(nsh): Implement corresponding parse functions for all consume functions
78+
// that exists.
79+
80+
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-param.
81+
func consumeParameter(s string, f func(key, val string)) (consumed, rest string, ok bool) {
82+
rest = s
83+
for len(rest) != 0 {
84+
var key, val string
85+
val = "?1" // Default value for empty val is boolean true.
86+
if rest[0] != ';' {
87+
break
88+
}
89+
rest = rest[1:]
90+
if i := countLeftWhitespace(rest); i > 0 {
91+
rest = rest[i:]
92+
}
93+
key, rest, ok = consumeKey(rest)
94+
if !ok {
95+
return "", s, ok
96+
}
97+
if len(rest) != 0 && rest[0] == '=' {
98+
rest = rest[1:]
99+
val, rest, ok = consumeBareItem(rest)
100+
if !ok {
101+
return "", s, ok
102+
}
103+
}
104+
if f != nil {
105+
f(key, val)
106+
}
107+
}
108+
return s[:len(s)-len(rest)], rest, true
109+
}
110+
111+
// ParseParameter is used to parse a string that represents a parameter in an
112+
// HTTP Structured Field Values.
113+
//
114+
// Given a string that represents a parameter, it will call the given function
115+
// using each of the keys and values contained in the parameter. This allows
116+
// the caller to extract information out of the parameter.
117+
//
118+
// This function will return once it encounters the end of the string, or
119+
// something that is not a parameter. If it cannot consume the entire given
120+
// string, the ok value returned will be false.
121+
//
122+
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-param.
123+
func ParseParameter(s string, f func(key, val string)) (ok bool) {
124+
_, rest, ok := consumeParameter(s, f)
125+
if rest != "" {
126+
return false
127+
}
128+
return ok
129+
}
130+
131+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-key.
132+
func consumeKey(s string) (consumed, rest string, ok bool) {
133+
if len(s) == 0 || (!isLCAlpha(s[0]) && s[0] != '*') {
134+
return "", s, false
135+
}
136+
i := 0
137+
for _, ch := range []byte(s) {
138+
if !isLCAlpha(ch) && !isDigit(ch) && !slices.Contains([]byte("_-.*"), ch) {
139+
break
140+
}
141+
i++
142+
}
143+
return s[:i], s[i:], true
144+
}
145+
146+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-an-integer-or-decim.
147+
func consumeIntegerOrDecimal(s string) (consumed, rest string, ok bool) {
148+
var i, signOffset, periodIndex int
149+
var isDecimal bool
150+
if i < len(s) && s[i] == '-' {
151+
i++
152+
signOffset++
153+
}
154+
if i >= len(s) {
155+
return "", s, false
156+
}
157+
if !isDigit(s[i]) {
158+
return "", s, false
159+
}
160+
for i < len(s) {
161+
ch := s[i]
162+
if isDigit(ch) {
163+
i++
164+
continue
165+
}
166+
if !isDecimal && ch == '.' {
167+
if i-signOffset > 12 {
168+
return "", s, false
169+
}
170+
periodIndex = i
171+
isDecimal = true
172+
i++
173+
continue
174+
}
175+
break
176+
}
177+
if !isDecimal && i-signOffset > 15 {
178+
return "", s, false
179+
}
180+
if isDecimal {
181+
if i-signOffset > 16 {
182+
return "", s, false
183+
}
184+
if s[i-1] == '.' {
185+
return "", s, false
186+
}
187+
if i-periodIndex-1 > 3 {
188+
return "", s, false
189+
}
190+
}
191+
return s[:i], s[i:], true
192+
}
193+
194+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-string.
195+
func consumeString(s string) (consumed, rest string, ok bool) {
196+
if len(s) == 0 || s[0] != '"' {
197+
return "", s, false
198+
}
199+
200+
for i := 1; i < len(s); i++ {
201+
switch ch := s[i]; ch {
202+
case '\\':
203+
if i+1 >= len(s) {
204+
return "", s, false
205+
}
206+
i++
207+
if ch = s[i]; ch != '"' && ch != '\\' {
208+
return "", s, false
209+
}
210+
case '"':
211+
return s[:i+1], s[i+1:], true
212+
default:
213+
if !isVChar(ch) && !isSP(ch) {
214+
return "", s, false
215+
}
216+
}
217+
}
218+
219+
return "", s, false
220+
}
221+
222+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-token
223+
func consumeToken(s string) (consumed, rest string, ok bool) {
224+
if len(s) == 0 || (!isAlpha(s[0]) && s[0] != '*') {
225+
return "", s, false
226+
}
227+
i := 0
228+
for _, ch := range []byte(s) {
229+
if !isTChar(ch) && !slices.Contains([]byte(":/"), ch) {
230+
break
231+
}
232+
i++
233+
}
234+
return s[:i], s[i:], true
235+
}
236+
237+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-byte-sequence.
238+
func consumeByteSequence(s string) (consumed, rest string, ok bool) {
239+
if len(s) == 0 || s[0] != ':' {
240+
return "", s, false
241+
}
242+
for i := 1; i < len(s); i++ {
243+
if ch := s[i]; ch == ':' {
244+
return s[:i+1], s[i+1:], true
245+
}
246+
if ch := s[i]; !isAlpha(ch) && !isDigit(ch) && !slices.Contains([]byte("+/="), ch) {
247+
return "", s, false
248+
}
249+
}
250+
return "", s, false
251+
}
252+
253+
// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-boolean.
254+
func consumeBoolean(s string) (consumed, rest string, ok bool) {
255+
if len(s) >= 2 && (s[:2] == "?0" || s[:2] == "?1") {
256+
return s[:2], s[2:], true
257+
}
258+
return "", s, false
259+
}
260+
261+
// https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
262+
func consumeBareItem(s string) (consumed, rest string, ok bool) {
263+
if len(s) == 0 {
264+
return "", s, false
265+
}
266+
267+
// TODO(nsh): This is currently only up to date with RFC 8941. Implement
268+
// Date and Display string for full feature parity with RFC 9651.
269+
ch := s[0]
270+
switch {
271+
case ch == '-' || isDigit(ch):
272+
return consumeIntegerOrDecimal(s)
273+
case ch == '"':
274+
return consumeString(s)
275+
case ch == '*' || isAlpha(ch):
276+
return consumeToken(s)
277+
case ch == ':':
278+
return consumeByteSequence(s)
279+
case ch == '?':
280+
return consumeBoolean(s)
281+
default:
282+
return "", s, false
283+
}
284+
}

0 commit comments

Comments
 (0)