Skip to content

Commit 2cf2e2a

Browse files
committed
feat: add wasm SIMD support
This makes wasm version of llhttp around 5-6% faster on real-world payloads. The produced code looks roughly like this: /* Load input */ input = wasm_v128_load(p); /* Find first character that does not match `ranges` */ single = wasm_i8x16_ne(input, wasm_u8x16_const_splat(0x9)); mask = single; single = wasm_v128_or( wasm_i8x16_lt(input, wasm_u8x16_const_splat(' ')), wasm_i8x16_gt(input, wasm_u8x16_const_splat('~')) ); mask = wasm_v128_and(mask, single); single = wasm_v128_or( wasm_i8x16_lt(input, wasm_u8x16_const_splat(0x80)), wasm_i8x16_gt(input, wasm_u8x16_const_splat(0xff)) ); mask = wasm_v128_and(mask, single); match_len = __builtin_ctz( 0x10000 | wasm_i8x16_bitmask(mask) ); It is conceptually similar to SSE vectorization that we already support except that we can't multiple comparisons at once and have to check ranges individually.
1 parent e2ae9d4 commit 2cf2e2a

File tree

5 files changed

+131
-65
lines changed

5 files changed

+131
-65
lines changed

package-lock.json

Lines changed: 6 additions & 47 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@
4343
"llparse-test-fixture": "^5.0.1",
4444
"mocha": "^9.2.2",
4545
"ts-node": "^9.0.0",
46-
"typescript": "^4.0.3"
46+
"typescript": "^5.0.3"
4747
},
4848
"dependencies": {
4949
"debug": "^4.2.0",
5050
"llparse-frontend": "^3.0.0"
5151
}
52-
}
52+
}

src/implementation/c/compilation.ts

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,9 @@ export class Compilation {
8585
const hex: string[] = [];
8686
for (let j = i; j < limit; j++) {
8787
const value = buffer[j];
88+
assert(value !== undefined);
8889

89-
const ch = String.fromCharCode(value);
90-
// `'`, `\`
91-
if (value === 0x27 || value === 0x5c) {
92-
hex.push(`'\\${ch}'`);
93-
} else if (value >= 0x20 && value <= 0x7e) {
94-
hex.push(`'${ch}'`);
95-
} else {
96-
hex.push(`0x${value.toString(16)}`);
97-
}
90+
hex.push(this.toChar(value));
9891
}
9992
let line = ' ' + hex.join(', ');
10093
if (limit !== buffer.length) {
@@ -331,4 +324,16 @@ export class Compilation {
331324
});
332325
return res;
333326
}
327+
328+
public toChar(value: number): string {
329+
const ch = String.fromCharCode(value);
330+
// `'`, `\`
331+
if (value === 0x27 || value === 0x5c) {
332+
return `'\\${ch}'`;
333+
} else if (value >= 0x20 && value <= 0x7e) {
334+
return `'${ch}'`;
335+
} else {
336+
return `0x${value.toString(16)}`;
337+
}
338+
}
334339
}

src/implementation/c/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ export class CCompiler {
4949
out.push('#endif /* __SSE4_2__ */');
5050
out.push('');
5151

52+
out.push('#ifdef __wasm__');
53+
out.push(' #include <sys/types.h>');
54+
out.push(' #include <wasm_simd128.h>');
55+
out.push('#endif /* __wasm__ */');
56+
out.push('');
57+
5258
out.push('#ifdef _MSC_VER');
5359
out.push(' #define ALIGN(n) _declspec(align(n))');
5460
out.push('#else /* !_MSC_VER */');

src/implementation/c/node/table-lookup.ts

Lines changed: 103 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const SSE_RANGES_LEN = 16;
1111
// _mm_cmpestri takes 128bit input
1212
const SSE_RANGES_PAD = 16;
1313
const MAX_SSE_CALLS = 2;
14+
const MAX_WASM_RANGES = 32;
1415
const SSE_ALIGNMENT = 16;
1516

1617
interface ITable {
@@ -34,7 +35,10 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
3435
// Try to vectorize nodes matching characters and looping to themselves
3536
// NOTE: `switch` below triggers when there is not enough characters in the
3637
// stream for vectorized processing.
37-
this.buildSSE(out);
38+
if (this.canVectorize()) {
39+
this.buildSSE(out);
40+
this.buildWASM(out);
41+
}
3842

3943
const current = transform.build(ctx, `*${ctx.posArg()}`);
4044
out.push(`switch (${table.name}[(uint8_t) ${current}]) {`);
@@ -63,9 +67,7 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
6367
out.push('}');
6468
}
6569

66-
private buildSSE(out: string[]): boolean {
67-
const ctx = this.compilation;
68-
70+
private canVectorize(): boolean {
6971
// Transformation is not supported atm
7072
if (this.ref.transform && this.ref.transform.ref.name !== 'id') {
7173
return false;
@@ -83,8 +85,14 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
8385
return false;
8486
}
8587

88+
assert.strictEqual(edge.noAdvance, false);
89+
90+
return true;
91+
}
92+
93+
private buildRanges(edge: frontend.node.TableLookup["edges"][0]): number[] {
8694
// NOTE: keys are sorted
87-
let ranges: number[] = [];
95+
const ranges: number[] = [];
8896
let first: number | undefined;
8997
let last: number | undefined;
9098
for (const key of edge.keys) {
@@ -104,6 +112,16 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
104112
if (first !== undefined && last !== undefined) {
105113
ranges.push(first, last);
106114
}
115+
return ranges;
116+
}
117+
118+
private buildSSE(out: string[]): boolean {
119+
const ctx = this.compilation;
120+
121+
const edge = this.ref.edges[0];
122+
assert(edge !== undefined);
123+
124+
const ranges = this.buildRanges(edge);
107125

108126
if (ranges.length === 0) {
109127
return false;
@@ -118,7 +136,6 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
118136
out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`);
119137
out.push(' __m128i ranges;');
120138
out.push(' __m128i input;');
121-
out.push(' int avail;');
122139
out.push(' int match_len;');
123140
out.push('');
124141
out.push(' /* Load input */');
@@ -145,7 +162,6 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
145162
out.push(` ${ctx.posArg()} += match_len;`);
146163

147164
const tmp: string[] = [];
148-
assert.strictEqual(edge.noAdvance, false);
149165
this.tailTo(tmp, {
150166
noAdvance: true,
151167
node: edge.node,
@@ -167,6 +183,86 @@ export class TableLookup extends Node<frontend.node.TableLookup> {
167183
return true;
168184
}
169185

186+
private buildWASM(out: string[]): boolean {
187+
const ctx = this.compilation;
188+
189+
const edge = this.ref.edges[0];
190+
assert(edge !== undefined);
191+
192+
const ranges = this.buildRanges(edge);
193+
194+
if (ranges.length === 0) {
195+
return false;
196+
}
197+
198+
// Way too many calls would be required
199+
if (ranges.length > MAX_WASM_RANGES) {
200+
return false;
201+
}
202+
203+
out.push('#ifdef __wasm_simd128__');
204+
out.push('off_t align;');
205+
out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`);
206+
out.push(' v128_t input;');
207+
out.push(' v128_t mask;');
208+
out.push(' v128_t single;');
209+
out.push(' int match_len;');
210+
out.push('');
211+
out.push(' /* Load input */');
212+
out.push(` input = wasm_v128_load(${ctx.posArg()});`);
213+
214+
out.push(' /* Find first character that does not match `ranges` */');
215+
function v128(value: number): string {
216+
return `wasm_u8x16_const_splat(${ctx.toChar(value)})`;
217+
}
218+
219+
for (let off = 0; off < ranges.length; off += 2) {
220+
const start = ranges[off];
221+
const end = ranges[off + 1];
222+
assert(start !== undefined);
223+
assert(end !== undefined);
224+
225+
// Same character, equality is sufficient (and faster)
226+
if (start === end) {
227+
out.push(` single = wasm_i8x16_ne(input, ${v128(start)});`);
228+
} else {
229+
out.push(` single = wasm_v128_or(`);
230+
out.push(` wasm_i8x16_lt(input, ${v128(start)}),`);
231+
out.push(` wasm_i8x16_gt(input, ${v128(end)})`);
232+
out.push(' );');
233+
}
234+
235+
if (off === 0) {
236+
out.push(' mask = single;');
237+
} else {
238+
out.push(' mask = wasm_v128_and(mask, single);');
239+
}
240+
}
241+
out.push(' match_len = __builtin_ctz(');
242+
out.push(' 0x10000 | wasm_i8x16_bitmask(mask)');
243+
out.push(' );');
244+
out.push(` ${ctx.posArg()} += match_len;`);
245+
out.push(' if (match_len != 16) {');
246+
{
247+
const tmp: string[] = [];
248+
this.tailTo(tmp, this.ref.otherwise!);
249+
ctx.indent(out, tmp, ' ');
250+
}
251+
out.push(' }');
252+
253+
const tmp: string[] = [];
254+
this.tailTo(tmp, {
255+
noAdvance: true,
256+
node: edge.node,
257+
});
258+
ctx.indent(out, tmp, ' ');
259+
out.push('}');
260+
261+
out.push('#endif /* __wasm_simd128__ */');
262+
263+
return true;
264+
}
265+
170266
private buildTable(): ITable {
171267
const table: number[] = new Array(MAX_CHAR + 1).fill(0);
172268

0 commit comments

Comments
 (0)