diff --git a/src/coreclr/debug/ee/amd64/amd64InstrDecode.h b/src/coreclr/debug/ee/amd64/amd64InstrDecode.h index 3c2da5c40ea342..8aa398e018f7c3 100644 --- a/src/coreclr/debug/ee/amd64/amd64InstrDecode.h +++ b/src/coreclr/debug/ee/amd64/amd64InstrDecode.h @@ -30,12 +30,21 @@ namespace Amd64InstrDecode // I4B // Instruction includes 4 bytes of immediates // I8B // Instruction includes 8 bytes of immediates // Unknown // Instruction samples did not include a modrm configured to produce RIP addressing - // L // Flags depend on L bit in encoding. L__or_ - // LL // Flags depend on L'L bits in EVEX encoding. LL___ - // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector - // W // Flags depend on W bit in encoding. W__or_ - // P // Flags depend on OpSize prefix for encoding. P__or_ - // WP // Flags depend on W bit in encoding and OpSize prefix. WP__or__or_ + // L // Flags depend on L bit in encoding. + // // L__or_ + // // L__or_ + // LL // Flags depend on L'L bits in EVEX encoding. + // // LL___ + // // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector + // W // Flags depend on W bit in encoding. + // // W__or_ + // // W__or_ + // P // Flags depend on OpSize prefix for encoding. + // // P__or_ + // // P__or_ + // WP // Flags depend on W bit in encoding and OpSize prefix. + // // WP__or__or_ + // // WP__or__or_ // WLL // Flags depend on W and L'L bits. // // WLL____or___ // bLL // Flags depend on EVEX.b and L'L bits. @@ -53,12 +62,12 @@ namespace Amd64InstrDecode I3B, I4B, I8B, - M1st_bLL_M4B_M16B_M32B_M64B, - M1st_bLL_M8B_M16B_M32B_M64B, M1st_I1B_L_M16B_or_M8B, M1st_I1B_LL_M8B_M16B_M32B, + M1st_I1B_W_M8B_or_M2B, M1st_I1B_W_M8B_or_M4B, M1st_I1B_WP_M8B_or_M4B_or_M2B, + M1st_I4B_W_M8B_or_M4B, M1st_L_M32B_or_M16B, M1st_LL_M16B_M32B_M64B, M1st_LL_M2B_M4B_M8B, @@ -76,6 +85,7 @@ namespace Amd64InstrDecode M1st_M8B, M1st_MUnknown, M1st_W_M4B_or_M1B, + M1st_W_M8B_I4B_or_M2B_I2B, M1st_W_M8B_or_M2B, M1st_W_M8B_or_M4B, M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B, @@ -88,6 +98,7 @@ namespace Amd64InstrDecode MOnly_MUnknown, MOnly_P_M6B_or_M4B, MOnly_W_M16B_or_M8B, + MOnly_W_M8B_or_M2B, MOnly_W_M8B_or_M4B, MOnly_WP_M8B_or_M4B_or_M2B, MOnly_WP_M8B_or_M8B_or_M2B, @@ -138,14 +149,14 @@ namespace Amd64InstrDecode MOp_WP_M8B_or_M4B_or_M2B, WP_I4B_or_I4B_or_I2B, WP_I8B_or_I4B_or_I2B, - Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location in encoded in lower bits + Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location is encoded in lower bits. }; - // The following instrForm maps correspond to the amd64 instr maps - // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics - // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp - // - For Vex* the pp is directly included in the encoding - // - For the Secondary, F38, and F3A pages the pp is not defined in the encoding, but affects instr form. + // The following instrForm maps correspond to the amd64 instruction maps. + // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics. + // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp. For example, 0x123 is opcode 0x12, pp=0x3. + // - For Vex* and EVEX the pp is directly included in the encoding + // - For the Secondary (0F), 0F 38, and 0F 3A pages the pp is not defined in the encoding, but affects instruction form. // - pp = 0 implies no prefix. // - pp = 1 implies 0x66 OpSize prefix only. // - pp = 2 implies 0xF3 prefix. @@ -153,9 +164,9 @@ namespace Amd64InstrDecode // - For the primary map, pp is not used and is always 0 in the comments. - // Instruction which change forms based on modrm.reg are encoded in this extension table. - // Since there are 8 modrm.reg values, they occur is groups of 8. - // Each group is referenced from the other tables below using Extension|(index >> 3). + // Instructions which change forms based on modrm.reg are encoded in this extension table. + // Since there are 8 modrm.reg values, they occur in groups of 8. + // Each group is referenced from the other tables below using (Extension|(index >> 3)). static const InstrForm instrFormExtension[217] { MOnly_M4B, // Primary:0xd90/0 fld @@ -252,32 +263,32 @@ namespace Amd64InstrDecode MOnly_M1B, // Secondary:0x180/3 prefetcht2 MOnly_W_M8B_or_M4B, // Secondary:0x180/4 nop MOnly_W_M8B_or_M4B, // Secondary:0x180/5 nop - MOnly_W_M8B_or_M4B, // Secondary:0x180/6 nop - MOnly_W_M8B_or_M4B, // Secondary:0x180/7 nop + MOnly_M1B, // Secondary:0x180/6 prefetchit1 + MOnly_M1B, // Secondary:0x180/7 prefetchit0 MOnly_M1B, // Secondary:0x181/0 prefetchnta MOnly_M1B, // Secondary:0x181/1 prefetcht0 MOnly_M1B, // Secondary:0x181/2 prefetcht1 MOnly_M1B, // Secondary:0x181/3 prefetcht2 - MOnly_M2B, // Secondary:0x181/4 nop - MOnly_M2B, // Secondary:0x181/5 nop - MOnly_M2B, // Secondary:0x181/6 nop - MOnly_M2B, // Secondary:0x181/7 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/4 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/5 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/6 nop + MOnly_W_M8B_or_M2B, // Secondary:0x181/7 nop MOnly_M1B, // Secondary:0x182/0 prefetchnta MOnly_M1B, // Secondary:0x182/1 prefetcht0 MOnly_M1B, // Secondary:0x182/2 prefetcht1 MOnly_M1B, // Secondary:0x182/3 prefetcht2 - MOnly_M4B, // Secondary:0x182/4 nop - MOnly_M4B, // Secondary:0x182/5 nop - MOnly_M4B, // Secondary:0x182/6 nop - MOnly_M4B, // Secondary:0x182/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x182/7 nop MOnly_M1B, // Secondary:0x183/0 prefetchnta MOnly_M1B, // Secondary:0x183/1 prefetcht0 MOnly_M1B, // Secondary:0x183/2 prefetcht1 MOnly_M1B, // Secondary:0x183/3 prefetcht2 - MOnly_M4B, // Secondary:0x183/4 nop - MOnly_M4B, // Secondary:0x183/5 nop - MOnly_M4B, // Secondary:0x183/6 nop - MOnly_M4B, // Secondary:0x183/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x183/7 nop MOnly_M1B, // Secondary:0x1c0/0 cldemote MOnly_W_M8B_or_M4B, // Secondary:0x1c0/1 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/2 nop @@ -286,30 +297,30 @@ namespace Amd64InstrDecode MOnly_W_M8B_or_M4B, // Secondary:0x1c0/5 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/6 nop MOnly_W_M8B_or_M4B, // Secondary:0x1c0/7 nop - MOnly_M2B, // Secondary:0x1c1/0 nop - MOnly_M2B, // Secondary:0x1c1/1 nop - MOnly_M2B, // Secondary:0x1c1/2 nop - MOnly_M2B, // Secondary:0x1c1/3 nop - MOnly_M2B, // Secondary:0x1c1/4 nop - MOnly_M2B, // Secondary:0x1c1/5 nop - MOnly_M2B, // Secondary:0x1c1/6 nop - MOnly_M2B, // Secondary:0x1c1/7 nop - MOnly_M4B, // Secondary:0x1c2/0 nop - MOnly_M4B, // Secondary:0x1c2/1 nop - MOnly_M4B, // Secondary:0x1c2/2 nop - MOnly_M4B, // Secondary:0x1c2/3 nop - MOnly_M4B, // Secondary:0x1c2/4 nop - MOnly_M4B, // Secondary:0x1c2/5 nop - MOnly_M4B, // Secondary:0x1c2/6 nop - MOnly_M4B, // Secondary:0x1c2/7 nop - MOnly_M4B, // Secondary:0x1c3/0 nop - MOnly_M4B, // Secondary:0x1c3/1 nop - MOnly_M4B, // Secondary:0x1c3/2 nop - MOnly_M4B, // Secondary:0x1c3/3 nop - MOnly_M4B, // Secondary:0x1c3/4 nop - MOnly_M4B, // Secondary:0x1c3/5 nop - MOnly_M4B, // Secondary:0x1c3/6 nop - MOnly_M4B, // Secondary:0x1c3/7 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/0 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/1 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/2 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/3 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/4 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/5 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/6 nop + MOnly_W_M8B_or_M2B, // Secondary:0x1c1/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/0 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/1 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/2 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/3 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c2/7 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/0 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/1 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/2 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/3 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/4 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/5 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/6 nop + MOnly_W_M8B_or_M4B, // Secondary:0x1c3/7 nop MOnly_MUnknown, // Secondary:0xae0/0 fxsave,fxsave64 MOnly_MUnknown, // Secondary:0xae0/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae0/2 ldmxcsr @@ -318,24 +329,24 @@ namespace Amd64InstrDecode MOnly_MUnknown, // Secondary:0xae0/5 xrstor,xrstor64 MOnly_MUnknown, // Secondary:0xae0/6 xsaveopt,xsaveopt64 MOnly_M1B, // Secondary:0xae0/7 clflush - MOnly_MUnknown, // Secondary:0xae1/0 fxsave - MOnly_MUnknown, // Secondary:0xae1/1 fxrstor + MOnly_MUnknown, // Secondary:0xae1/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae1/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae1/2 ldmxcsr MOnly_M4B, // Secondary:0xae1/3 stmxcsr None, None, MOnly_M1B, // Secondary:0xae1/6 clwb MOnly_M1B, // Secondary:0xae1/7 clflushopt - MOnly_MUnknown, // Secondary:0xae2/0 fxsave - MOnly_MUnknown, // Secondary:0xae2/1 fxrstor + MOnly_MUnknown, // Secondary:0xae2/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae2/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae2/2 ldmxcsr MOnly_M4B, // Secondary:0xae2/3 stmxcsr - MOnly_M4B, // Secondary:0xae2/4 ptwrite + MOnly_W_M8B_or_M4B, // Secondary:0xae2/4 ptwrite None, MOnly_M8B, // Secondary:0xae2/6 clrssbsy None, - MOnly_MUnknown, // Secondary:0xae3/0 fxsave - MOnly_MUnknown, // Secondary:0xae3/1 fxrstor + MOnly_MUnknown, // Secondary:0xae3/0 fxsave,fxsave64 + MOnly_MUnknown, // Secondary:0xae3/1 fxrstor,fxrstor64 MOnly_M4B, // Secondary:0xae3/2 ldmxcsr MOnly_M4B, // Secondary:0xae3/3 stmxcsr None, @@ -351,27 +362,27 @@ namespace Amd64InstrDecode MOnly_M8B, // Secondary:0xc70/6 vmptrld MOnly_M8B, // Secondary:0xc70/7 vmptrst None, - MOnly_M8B, // Secondary:0xc71/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc71/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc71/3 xrstors - MOnly_MUnknown, // Secondary:0xc71/4 xsavec - MOnly_MUnknown, // Secondary:0xc71/5 xsaves + MOnly_MUnknown, // Secondary:0xc71/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc71/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc71/5 xsaves,xsaves64 MOnly_M8B, // Secondary:0xc71/6 vmclear MOnly_M8B, // Secondary:0xc71/7 vmptrst None, - MOnly_M8B, // Secondary:0xc72/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc72/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc72/3 xrstors - MOnly_MUnknown, // Secondary:0xc72/4 xsavec - MOnly_MUnknown, // Secondary:0xc72/5 xsaves + MOnly_MUnknown, // Secondary:0xc72/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc72/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc72/5 xsaves,xsaves64 MOnly_M8B, // Secondary:0xc72/6 vmxon MOnly_M8B, // Secondary:0xc72/7 vmptrst None, - MOnly_M8B, // Secondary:0xc73/1 cmpxchg8b + MOnly_W_M16B_or_M8B, // Secondary:0xc73/1 cmpxchg16b,cmpxchg8b None, - MOnly_MUnknown, // Secondary:0xc73/3 xrstors - MOnly_MUnknown, // Secondary:0xc73/4 xsavec - MOnly_MUnknown, // Secondary:0xc73/5 xsaves + MOnly_MUnknown, // Secondary:0xc73/3 xrstors,xrstors64 + MOnly_MUnknown, // Secondary:0xc73/4 xsavec,xsavec64 + MOnly_MUnknown, // Secondary:0xc73/5 xsaves,xsaves64 None, MOnly_M8B, // Secondary:0xc73/7 vmptrst }; @@ -458,22 +469,22 @@ namespace Amd64InstrDecode None, // 0x4d0 None, // 0x4e0 None, // 0x4f0 - None, // 0x500 push - None, // 0x510 push - None, // 0x520 push - None, // 0x530 push - None, // 0x540 push - None, // 0x550 push - None, // 0x560 push - None, // 0x570 push - None, // 0x580 pop - None, // 0x590 pop - None, // 0x5a0 pop - None, // 0x5b0 pop - None, // 0x5c0 pop - None, // 0x5d0 pop - None, // 0x5e0 pop - None, // 0x5f0 pop + None, // 0x500 push,pushp + None, // 0x510 push,pushp + None, // 0x520 push,pushp + None, // 0x530 push,pushp + None, // 0x540 push,pushp + None, // 0x550 push,pushp + None, // 0x560 push,pushp + None, // 0x570 push,pushp + None, // 0x580 pop,popp + None, // 0x590 pop,popp + None, // 0x5a0 pop,popp + None, // 0x5b0 pop,popp + None, // 0x5c0 pop,popp + None, // 0x5d0 pop,popp + None, // 0x5e0 pop,popp + None, // 0x5f0 pop,popp None, // 0x600 None, // 0x610 None, // 0x620 @@ -539,7 +550,7 @@ namespace Amd64InstrDecode None, // 0x9e0 sahf None, // 0x9f0 lahf I8B, // 0xa00 movabs - I8B, // 0xa10 movabs + I8B, // 0xa10 jmpabs,movabs I8B, // 0xa20 movabs I8B, // 0xa30 movabs None, // 0xa40 movs @@ -641,7 +652,7 @@ namespace Amd64InstrDecode MOnly_M2B, // 0x000 lldt,ltr,sldt,str,verr,verw MOnly_M2B, // 0x001 lldt,ltr,sldt,str,verr,verw MOnly_M2B, // 0x002 lldt,ltr,sldt,str,verr,verw - MOnly_M2B, // 0x003 lldt,ltr,sldt,str,verr,verw + MOnly_M2B, // 0x003 lkgs,lldt,ltr,sldt,str,verr,verw InstrForm(int(Extension)|0x07), // 0x010 InstrForm(int(Extension)|0x08), // 0x011 InstrForm(int(Extension)|0x09), // 0x012 @@ -667,9 +678,9 @@ namespace Amd64InstrDecode None, // 0x062 clts None, // 0x063 clts None, // 0x070 sysretd,sysretq - None, // 0x071 sysretd - None, // 0x072 sysretd - None, // 0x073 sysretd + None, // 0x071 sysretd,sysretq + None, // 0x072 sysretd,sysretq + None, // 0x073 sysretd,sysretq None, // 0x080 invd None, // 0x081 invd None, // 0x082 invd @@ -739,9 +750,9 @@ namespace Amd64InstrDecode InstrForm(int(Extension)|0x0d), // 0x182 InstrForm(int(Extension)|0x0e), // 0x183 MOnly_W_M8B_or_M4B, // 0x190 nop - MOnly_M2B, // 0x191 nop - MOnly_M4B, // 0x192 nop - MOnly_M4B, // 0x193 nop + MOnly_W_M8B_or_M2B, // 0x191 nop + MOnly_W_M8B_or_M4B, // 0x192 nop + MOnly_W_M8B_or_M4B, // 0x193 nop None, // 0x1a0 MOp_MUnknown, // 0x1a1 bndmov MOp_MUnknown, // 0x1a2 bndcl @@ -755,17 +766,17 @@ namespace Amd64InstrDecode InstrForm(int(Extension)|0x11), // 0x1c2 InstrForm(int(Extension)|0x12), // 0x1c3 MOnly_W_M8B_or_M4B, // 0x1d0 nop - MOnly_M2B, // 0x1d1 nop - MOnly_M4B, // 0x1d2 nop - MOnly_M4B, // 0x1d3 nop + MOnly_W_M8B_or_M2B, // 0x1d1 nop + MOnly_W_M8B_or_M4B, // 0x1d2 nop + MOnly_W_M8B_or_M4B, // 0x1d3 nop MOnly_W_M8B_or_M4B, // 0x1e0 nop - MOnly_M2B, // 0x1e1 nop - MOnly_M4B, // 0x1e2 nop - MOnly_M4B, // 0x1e3 nop + MOnly_W_M8B_or_M2B, // 0x1e1 nop + MOnly_W_M8B_or_M4B, // 0x1e2 nop + MOnly_W_M8B_or_M4B, // 0x1e3 nop MOnly_W_M8B_or_M4B, // 0x1f0 nop - MOnly_M2B, // 0x1f1 nop - MOnly_M4B, // 0x1f2 nop - MOnly_M4B, // 0x1f3 nop + MOnly_W_M8B_or_M2B, // 0x1f1 nop + MOnly_W_M8B_or_M4B, // 0x1f2 nop + MOnly_W_M8B_or_M4B, // 0x1f3 nop I1B, // 0x200 mov I1B, // 0x201 mov I1B, // 0x202 mov @@ -808,8 +819,8 @@ namespace Amd64InstrDecode None, // 0x293 MOp_M8B, // 0x2a0 cvtpi2ps MOp_M8B, // 0x2a1 cvtpi2pd - MOp_M4B, // 0x2a2 cvtsi2ss - MOp_M4B, // 0x2a3 cvtsi2sd + MOp_W_M8B_or_M4B, // 0x2a2 cvtsi2ss + MOp_W_M8B_or_M4B, // 0x2a3 cvtsi2sd M1st_M16B, // 0x2b0 movntps M1st_M16B, // 0x2b1 movntpd M1st_M4B, // 0x2b2 movntss @@ -851,9 +862,9 @@ namespace Amd64InstrDecode None, // 0x342 sysenter None, // 0x343 sysenter None, // 0x350 sysexitd,sysexitq - None, // 0x351 sysexitd - None, // 0x352 sysexitd - None, // 0x353 sysexitd + None, // 0x351 sysexitd,sysexitq + None, // 0x352 sysexitd,sysexitq + None, // 0x353 sysexitd,sysexitq None, // 0x360 None, // 0x361 None, // 0x362 @@ -895,69 +906,69 @@ namespace Amd64InstrDecode None, // 0x3f2 None, // 0x3f3 MOp_W_M8B_or_M4B, // 0x400 cmovo - MOp_M2B, // 0x401 cmovo - MOp_M4B, // 0x402 cmovo - MOp_M4B, // 0x403 cmovo + MOp_W_M8B_or_M2B, // 0x401 cmovo + MOp_W_M8B_or_M4B, // 0x402 cmovo + MOp_W_M8B_or_M4B, // 0x403 cmovo MOp_W_M8B_or_M4B, // 0x410 cmovno - MOp_M2B, // 0x411 cmovno - MOp_M4B, // 0x412 cmovno - MOp_M4B, // 0x413 cmovno + MOp_W_M8B_or_M2B, // 0x411 cmovno + MOp_W_M8B_or_M4B, // 0x412 cmovno + MOp_W_M8B_or_M4B, // 0x413 cmovno MOp_W_M8B_or_M4B, // 0x420 cmovb - MOp_M2B, // 0x421 cmovb - MOp_M4B, // 0x422 cmovb - MOp_M4B, // 0x423 cmovb + MOp_W_M8B_or_M2B, // 0x421 cmovb + MOp_W_M8B_or_M4B, // 0x422 cmovb + MOp_W_M8B_or_M4B, // 0x423 cmovb MOp_W_M8B_or_M4B, // 0x430 cmovae - MOp_M2B, // 0x431 cmovae - MOp_M4B, // 0x432 cmovae - MOp_M4B, // 0x433 cmovae + MOp_W_M8B_or_M2B, // 0x431 cmovae + MOp_W_M8B_or_M4B, // 0x432 cmovae + MOp_W_M8B_or_M4B, // 0x433 cmovae MOp_W_M8B_or_M4B, // 0x440 cmove - MOp_M2B, // 0x441 cmove - MOp_M4B, // 0x442 cmove - MOp_M4B, // 0x443 cmove + MOp_W_M8B_or_M2B, // 0x441 cmove + MOp_W_M8B_or_M4B, // 0x442 cmove + MOp_W_M8B_or_M4B, // 0x443 cmove MOp_W_M8B_or_M4B, // 0x450 cmovne - MOp_M2B, // 0x451 cmovne - MOp_M4B, // 0x452 cmovne - MOp_M4B, // 0x453 cmovne + MOp_W_M8B_or_M2B, // 0x451 cmovne + MOp_W_M8B_or_M4B, // 0x452 cmovne + MOp_W_M8B_or_M4B, // 0x453 cmovne MOp_W_M8B_or_M4B, // 0x460 cmovbe - MOp_M2B, // 0x461 cmovbe - MOp_M4B, // 0x462 cmovbe - MOp_M4B, // 0x463 cmovbe + MOp_W_M8B_or_M2B, // 0x461 cmovbe + MOp_W_M8B_or_M4B, // 0x462 cmovbe + MOp_W_M8B_or_M4B, // 0x463 cmovbe MOp_W_M8B_or_M4B, // 0x470 cmova - MOp_M2B, // 0x471 cmova - MOp_M4B, // 0x472 cmova - MOp_M4B, // 0x473 cmova + MOp_W_M8B_or_M2B, // 0x471 cmova + MOp_W_M8B_or_M4B, // 0x472 cmova + MOp_W_M8B_or_M4B, // 0x473 cmova MOp_W_M8B_or_M4B, // 0x480 cmovs - MOp_M2B, // 0x481 cmovs - MOp_M4B, // 0x482 cmovs - MOp_M4B, // 0x483 cmovs + MOp_W_M8B_or_M2B, // 0x481 cmovs + MOp_W_M8B_or_M4B, // 0x482 cmovs + MOp_W_M8B_or_M4B, // 0x483 cmovs MOp_W_M8B_or_M4B, // 0x490 cmovns - MOp_M2B, // 0x491 cmovns - MOp_M4B, // 0x492 cmovns - MOp_M4B, // 0x493 cmovns + MOp_W_M8B_or_M2B, // 0x491 cmovns + MOp_W_M8B_or_M4B, // 0x492 cmovns + MOp_W_M8B_or_M4B, // 0x493 cmovns MOp_W_M8B_or_M4B, // 0x4a0 cmovp - MOp_M2B, // 0x4a1 cmovp - MOp_M4B, // 0x4a2 cmovp - MOp_M4B, // 0x4a3 cmovp + MOp_W_M8B_or_M2B, // 0x4a1 cmovp + MOp_W_M8B_or_M4B, // 0x4a2 cmovp + MOp_W_M8B_or_M4B, // 0x4a3 cmovp MOp_W_M8B_or_M4B, // 0x4b0 cmovnp - MOp_M2B, // 0x4b1 cmovnp - MOp_M4B, // 0x4b2 cmovnp - MOp_M4B, // 0x4b3 cmovnp + MOp_W_M8B_or_M2B, // 0x4b1 cmovnp + MOp_W_M8B_or_M4B, // 0x4b2 cmovnp + MOp_W_M8B_or_M4B, // 0x4b3 cmovnp MOp_W_M8B_or_M4B, // 0x4c0 cmovl - MOp_M2B, // 0x4c1 cmovl - MOp_M4B, // 0x4c2 cmovl - MOp_M4B, // 0x4c3 cmovl + MOp_W_M8B_or_M2B, // 0x4c1 cmovl + MOp_W_M8B_or_M4B, // 0x4c2 cmovl + MOp_W_M8B_or_M4B, // 0x4c3 cmovl MOp_W_M8B_or_M4B, // 0x4d0 cmovge - MOp_M2B, // 0x4d1 cmovge - MOp_M4B, // 0x4d2 cmovge - MOp_M4B, // 0x4d3 cmovge + MOp_W_M8B_or_M2B, // 0x4d1 cmovge + MOp_W_M8B_or_M4B, // 0x4d2 cmovge + MOp_W_M8B_or_M4B, // 0x4d3 cmovge MOp_W_M8B_or_M4B, // 0x4e0 cmovle - MOp_M2B, // 0x4e1 cmovle - MOp_M4B, // 0x4e2 cmovle - MOp_M4B, // 0x4e3 cmovle + MOp_W_M8B_or_M2B, // 0x4e1 cmovle + MOp_W_M8B_or_M4B, // 0x4e2 cmovle + MOp_W_M8B_or_M4B, // 0x4e3 cmovle MOp_W_M8B_or_M4B, // 0x4f0 cmovg - MOp_M2B, // 0x4f1 cmovg - MOp_M4B, // 0x4f2 cmovg - MOp_M4B, // 0x4f3 cmovg + MOp_W_M8B_or_M2B, // 0x4f1 cmovg + MOp_W_M8B_or_M4B, // 0x4f2 cmovg + MOp_W_M8B_or_M4B, // 0x4f3 cmovg None, // 0x500 None, // 0x501 None, // 0x502 @@ -1079,7 +1090,7 @@ namespace Amd64InstrDecode None, // 0x6d2 None, // 0x6d3 MOp_W_M8B_or_M4B, // 0x6e0 movd,movq - MOp_M4B, // 0x6e1 movd + MOp_W_M8B_or_M4B, // 0x6e1 movd,movq None, // 0x6e2 None, // 0x6e3 MOp_M8B, // 0x6f0 movq @@ -1143,7 +1154,7 @@ namespace Amd64InstrDecode None, // 0x7d2 MOp_M16B, // 0x7d3 hsubps M1st_W_M8B_or_M4B, // 0x7e0 movd,movq - M1st_M4B, // 0x7e1 movd + M1st_W_M8B_or_M4B, // 0x7e1 movd,movq MOp_M8B, // 0x7e2 movq None, // 0x7e3 M1st_M8B, // 0x7f0 movq @@ -1151,67 +1162,67 @@ namespace Amd64InstrDecode M1st_M16B, // 0x7f2 movdqu None, // 0x7f3 I4B, // 0x800 jo - I2B, // 0x801 jo + WP_I4B_or_I4B_or_I2B, // 0x801 jo I4B, // 0x802 jo I4B, // 0x803 jo I4B, // 0x810 jno - I2B, // 0x811 jno + WP_I4B_or_I4B_or_I2B, // 0x811 jno I4B, // 0x812 jno I4B, // 0x813 jno I4B, // 0x820 jb - I2B, // 0x821 jb + WP_I4B_or_I4B_or_I2B, // 0x821 jb I4B, // 0x822 jb I4B, // 0x823 jb I4B, // 0x830 jae - I2B, // 0x831 jae + WP_I4B_or_I4B_or_I2B, // 0x831 jae I4B, // 0x832 jae I4B, // 0x833 jae I4B, // 0x840 je - I2B, // 0x841 je + WP_I4B_or_I4B_or_I2B, // 0x841 je I4B, // 0x842 je I4B, // 0x843 je I4B, // 0x850 jne - I2B, // 0x851 jne + WP_I4B_or_I4B_or_I2B, // 0x851 jne I4B, // 0x852 jne I4B, // 0x853 jne I4B, // 0x860 jbe - I2B, // 0x861 jbe + WP_I4B_or_I4B_or_I2B, // 0x861 jbe I4B, // 0x862 jbe I4B, // 0x863 jbe I4B, // 0x870 ja - I2B, // 0x871 ja + WP_I4B_or_I4B_or_I2B, // 0x871 ja I4B, // 0x872 ja I4B, // 0x873 ja I4B, // 0x880 js - I2B, // 0x881 js + WP_I4B_or_I4B_or_I2B, // 0x881 js I4B, // 0x882 js I4B, // 0x883 js I4B, // 0x890 jns - I2B, // 0x891 jns + WP_I4B_or_I4B_or_I2B, // 0x891 jns I4B, // 0x892 jns I4B, // 0x893 jns I4B, // 0x8a0 jp - I2B, // 0x8a1 jp + WP_I4B_or_I4B_or_I2B, // 0x8a1 jp I4B, // 0x8a2 jp I4B, // 0x8a3 jp I4B, // 0x8b0 jnp - I2B, // 0x8b1 jnp + WP_I4B_or_I4B_or_I2B, // 0x8b1 jnp I4B, // 0x8b2 jnp I4B, // 0x8b3 jnp I4B, // 0x8c0 jl - I2B, // 0x8c1 jl + WP_I4B_or_I4B_or_I2B, // 0x8c1 jl I4B, // 0x8c2 jl I4B, // 0x8c3 jl I4B, // 0x8d0 jge - I2B, // 0x8d1 jge + WP_I4B_or_I4B_or_I2B, // 0x8d1 jge I4B, // 0x8d2 jge I4B, // 0x8d3 jge I4B, // 0x8e0 jle - I2B, // 0x8e1 jle + WP_I4B_or_I4B_or_I2B, // 0x8e1 jle I4B, // 0x8e2 jle I4B, // 0x8e3 jle I4B, // 0x8f0 jg - I2B, // 0x8f1 jg + WP_I4B_or_I4B_or_I2B, // 0x8f1 jg I4B, // 0x8f2 jg I4B, // 0x8f3 jg MOnly_M1B, // 0x900 seto @@ -1279,11 +1290,11 @@ namespace Amd64InstrDecode MOnly_M1B, // 0x9f2 setg MOnly_M1B, // 0x9f3 setg None, // 0xa00 push - None, // 0xa01 pushw + None, // 0xa01 push,pushw None, // 0xa02 push None, // 0xa03 push None, // 0xa10 pop - None, // 0xa11 popw + None, // 0xa11 pop,popw None, // 0xa12 pop None, // 0xa13 pop None, // 0xa20 cpuid @@ -1291,17 +1302,17 @@ namespace Amd64InstrDecode None, // 0xa22 cpuid None, // 0xa23 cpuid M1st_W_M8B_or_M4B, // 0xa30 bt - M1st_M2B, // 0xa31 bt - M1st_M4B, // 0xa32 bt - M1st_M4B, // 0xa33 bt + M1st_W_M8B_or_M2B, // 0xa31 bt + M1st_W_M8B_or_M4B, // 0xa32 bt + M1st_W_M8B_or_M4B, // 0xa33 bt M1st_I1B_W_M8B_or_M4B, // 0xa40 shld - M1st_M2B_I1B, // 0xa41 shld - M1st_M4B_I1B, // 0xa42 shld - M1st_M4B_I1B, // 0xa43 shld + M1st_I1B_W_M8B_or_M2B, // 0xa41 shld + M1st_I1B_W_M8B_or_M4B, // 0xa42 shld + M1st_I1B_W_M8B_or_M4B, // 0xa43 shld M1st_W_M8B_or_M4B, // 0xa50 shld - M1st_M2B, // 0xa51 shld - M1st_M4B, // 0xa52 shld - M1st_M4B, // 0xa53 shld + M1st_W_M8B_or_M2B, // 0xa51 shld + M1st_W_M8B_or_M4B, // 0xa52 shld + M1st_W_M8B_or_M4B, // 0xa53 shld None, // 0xa60 None, // 0xa61 None, // 0xa62 @@ -1311,11 +1322,11 @@ namespace Amd64InstrDecode None, // 0xa72 None, // 0xa73 None, // 0xa80 push - None, // 0xa81 pushw + None, // 0xa81 push,pushw None, // 0xa82 push None, // 0xa83 push None, // 0xa90 pop - None, // 0xa91 popw + None, // 0xa91 pop,popw None, // 0xa92 pop None, // 0xa93 pop None, // 0xaa0 rsm @@ -1323,41 +1334,41 @@ namespace Amd64InstrDecode None, // 0xaa2 rsm None, // 0xaa3 rsm M1st_W_M8B_or_M4B, // 0xab0 bts - M1st_M2B, // 0xab1 bts - M1st_M4B, // 0xab2 bts - M1st_M4B, // 0xab3 bts + M1st_W_M8B_or_M2B, // 0xab1 bts + M1st_W_M8B_or_M4B, // 0xab2 bts + M1st_W_M8B_or_M4B, // 0xab3 bts M1st_I1B_W_M8B_or_M4B, // 0xac0 shrd - M1st_M2B_I1B, // 0xac1 shrd - M1st_M4B_I1B, // 0xac2 shrd - M1st_M4B_I1B, // 0xac3 shrd + M1st_I1B_W_M8B_or_M2B, // 0xac1 shrd + M1st_I1B_W_M8B_or_M4B, // 0xac2 shrd + M1st_I1B_W_M8B_or_M4B, // 0xac3 shrd M1st_W_M8B_or_M4B, // 0xad0 shrd - M1st_M2B, // 0xad1 shrd - M1st_M4B, // 0xad2 shrd - M1st_M4B, // 0xad3 shrd + M1st_W_M8B_or_M2B, // 0xad1 shrd + M1st_W_M8B_or_M4B, // 0xad2 shrd + M1st_W_M8B_or_M4B, // 0xad3 shrd InstrForm(int(Extension)|0x13), // 0xae0 InstrForm(int(Extension)|0x14), // 0xae1 InstrForm(int(Extension)|0x15), // 0xae2 InstrForm(int(Extension)|0x16), // 0xae3 MOp_W_M8B_or_M4B, // 0xaf0 imul - MOp_M2B, // 0xaf1 imul - MOp_M4B, // 0xaf2 imul - MOp_M4B, // 0xaf3 imul + MOp_W_M8B_or_M2B, // 0xaf1 imul + MOp_W_M8B_or_M4B, // 0xaf2 imul + MOp_W_M8B_or_M4B, // 0xaf3 imul M1st_M1B, // 0xb00 cmpxchg M1st_M1B, // 0xb01 cmpxchg M1st_M1B, // 0xb02 cmpxchg M1st_M1B, // 0xb03 cmpxchg M1st_W_M8B_or_M4B, // 0xb10 cmpxchg - M1st_M2B, // 0xb11 cmpxchg - M1st_M4B, // 0xb12 cmpxchg - M1st_M4B, // 0xb13 cmpxchg + M1st_W_M8B_or_M2B, // 0xb11 cmpxchg + M1st_W_M8B_or_M4B, // 0xb12 cmpxchg + M1st_W_M8B_or_M4B, // 0xb13 cmpxchg MOp_M6B, // 0xb20 lss MOp_M4B, // 0xb21 lss MOp_M6B, // 0xb22 lss MOp_M6B, // 0xb23 lss M1st_W_M8B_or_M4B, // 0xb30 btr - M1st_M2B, // 0xb31 btr - M1st_M4B, // 0xb32 btr - M1st_M4B, // 0xb33 btr + M1st_W_M8B_or_M2B, // 0xb31 btr + M1st_W_M8B_or_M4B, // 0xb32 btr + M1st_W_M8B_or_M4B, // 0xb33 btr MOp_M6B, // 0xb40 lfs MOp_M4B, // 0xb41 lfs MOp_M6B, // 0xb42 lfs @@ -1376,27 +1387,27 @@ namespace Amd64InstrDecode MOp_M2B, // 0xb73 movzx None, // 0xb80 None, // 0xb81 - MOp_M4B, // 0xb82 popcnt + MOp_W_M8B_or_M4B, // 0xb82 popcnt None, // 0xb83 MOp_W_M8B_or_M4B, // 0xb90 ud1 - MOp_M2B, // 0xb91 ud1 - MOp_M4B, // 0xb92 ud1 - MOp_M4B, // 0xb93 ud1 + MOp_W_M8B_or_M2B, // 0xb91 ud1 + MOp_W_M8B_or_M4B, // 0xb92 ud1 + MOp_W_M8B_or_M4B, // 0xb93 ud1 M1st_I1B_W_M8B_or_M4B, // 0xba0 bt,btc,btr,bts - M1st_M2B_I1B, // 0xba1 bt,btc,btr,bts - M1st_M4B_I1B, // 0xba2 bt,btc,btr,bts - M1st_M4B_I1B, // 0xba3 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M2B, // 0xba1 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M4B, // 0xba2 bt,btc,btr,bts + M1st_I1B_W_M8B_or_M4B, // 0xba3 bt,btc,btr,bts M1st_W_M8B_or_M4B, // 0xbb0 btc - M1st_M2B, // 0xbb1 btc - M1st_M4B, // 0xbb2 btc - M1st_M4B, // 0xbb3 btc + M1st_W_M8B_or_M2B, // 0xbb1 btc + M1st_W_M8B_or_M4B, // 0xbb2 btc + M1st_W_M8B_or_M4B, // 0xbb3 btc MOp_W_M8B_or_M4B, // 0xbc0 bsf - MOp_M2B, // 0xbc1 bsf - MOp_M4B, // 0xbc2 tzcnt + MOp_W_M8B_or_M2B, // 0xbc1 bsf + MOp_W_M8B_or_M4B, // 0xbc2 tzcnt None, // 0xbc3 MOp_W_M8B_or_M4B, // 0xbd0 bsr - MOp_M2B, // 0xbd1 bsr - MOp_M4B, // 0xbd2 lzcnt + MOp_W_M8B_or_M2B, // 0xbd1 bsr + MOp_W_M8B_or_M4B, // 0xbd2 lzcnt None, // 0xbd3 MOp_M1B, // 0xbe0 movsx MOp_M1B, // 0xbe1 movsx @@ -1411,9 +1422,9 @@ namespace Amd64InstrDecode M1st_M1B, // 0xc02 xadd M1st_M1B, // 0xc03 xadd M1st_W_M8B_or_M4B, // 0xc10 xadd - M1st_M2B, // 0xc11 xadd - M1st_M4B, // 0xc12 xadd - M1st_M4B, // 0xc13 xadd + M1st_W_M8B_or_M2B, // 0xc11 xadd + M1st_W_M8B_or_M4B, // 0xc12 xadd + M1st_W_M8B_or_M4B, // 0xc13 xadd MOp_M16B_I1B, // 0xc20 cmpps MOp_M16B_I1B, // 0xc21 cmppd MOp_M4B_I1B, // 0xc22 cmpss @@ -1659,9 +1670,9 @@ namespace Amd64InstrDecode None, // 0xfe2 None, // 0xfe3 MOp_W_M8B_or_M4B, // 0xff0 ud0 - MOp_M2B, // 0xff1 ud0 - MOp_M4B, // 0xff2 ud0 - MOp_M4B, // 0xff3 ud0 + MOp_W_M8B_or_M2B, // 0xff1 ud0 + MOp_W_M8B_or_M4B, // 0xff2 ud0 + MOp_W_M8B_or_M4B, // 0xff3 ud0 }; static const InstrForm instrFormF38[1024] @@ -2674,10 +2685,10 @@ namespace Amd64InstrDecode None, // 0xfb1 None, // 0xfb2 None, // 0xfb3 - None, // 0xfc0 - None, // 0xfc1 - None, // 0xfc2 - None, // 0xfc3 + M1st_W_M8B_or_M4B, // 0xfc0 aadd + M1st_W_M8B_or_M4B, // 0xfc1 aand + M1st_W_M8B_or_M4B, // 0xfc2 axor + M1st_W_M8B_or_M4B, // 0xfc3 aor None, // 0xfd0 None, // 0xfd1 None, // 0xfd2 @@ -5070,14 +5081,14 @@ namespace Amd64InstrDecode None, // 0x4f1 None, // 0x4f2 None, // 0x4f3 - None, // 0x500 + MOp_L_M32B_or_M16B, // 0x500 vpdpbuud None, // 0x501 - None, // 0x502 - None, // 0x503 - None, // 0x510 + MOp_L_M32B_or_M16B, // 0x502 vpdpbsud + MOp_L_M32B_or_M16B, // 0x503 vpdpbssd + MOp_L_M32B_or_M16B, // 0x510 vpdpbuuds None, // 0x511 - None, // 0x512 - None, // 0x513 + MOp_L_M32B_or_M16B, // 0x512 vpdpbsuds + MOp_L_M32B_or_M16B, // 0x513 vpdpbssds None, // 0x520 None, // 0x521 None, // 0x522 @@ -5454,13 +5465,13 @@ namespace Amd64InstrDecode MOp_W_M8B_or_M4B, // 0xaf1 vfnmsub213sd,vfnmsub213ss None, // 0xaf2 None, // 0xaf3 - None, // 0xb00 - None, // 0xb01 - None, // 0xb02 - None, // 0xb03 + MOp_L_M32B_or_M16B, // 0xb00 vcvtneoph2ps + MOp_L_M32B_or_M16B, // 0xb01 vcvtneeph2ps + MOp_L_M32B_or_M16B, // 0xb02 vcvtneebf162ps + MOp_L_M32B_or_M16B, // 0xb03 vcvtneobf162ps None, // 0xb10 - None, // 0xb11 - None, // 0xb12 + MOp_M2B, // 0xb11 vbcstnesh2ps + MOp_M2B, // 0xb12 vbcstnebf162ps None, // 0xb13 None, // 0xb20 None, // 0xb21 @@ -5590,13 +5601,13 @@ namespace Amd64InstrDecode None, // 0xd11 None, // 0xd12 None, // 0xd13 - None, // 0xd20 - None, // 0xd21 - None, // 0xd22 + MOp_L_M32B_or_M16B, // 0xd20 vpdpwuud + MOp_L_M32B_or_M16B, // 0xd21 vpdpwusd + MOp_L_M32B_or_M16B, // 0xd22 vpdpwsud None, // 0xd23 - None, // 0xd30 - None, // 0xd31 - None, // 0xd32 + MOp_L_M32B_or_M16B, // 0xd30 vpdpwuuds + MOp_L_M32B_or_M16B, // 0xd31 vpdpwusds + MOp_L_M32B_or_M16B, // 0xd32 vpdpwsuds None, // 0xd33 None, // 0xd40 None, // 0xd41 @@ -5622,10 +5633,10 @@ namespace Amd64InstrDecode None, // 0xd91 None, // 0xd92 None, // 0xd93 - None, // 0xda0 - None, // 0xda1 - None, // 0xda2 - None, // 0xda3 + MOp_M16B, // 0xda0 vsm3msg1 + MOp_M16B, // 0xda1 vsm3msg2 + MOp_L_M32B_or_M16B, // 0xda2 vsm4key4 + MOp_L_M32B_or_M16B, // 0xda3 vsm4rnds4 None, // 0xdb0 MOp_M16B, // 0xdb1 vaesimc None, // 0xdb2 @@ -5647,67 +5658,67 @@ namespace Amd64InstrDecode None, // 0xdf2 None, // 0xdf3 None, // 0xe00 - None, // 0xe01 + M1st_W_M8B_or_M4B, // 0xe01 cmpoxadd None, // 0xe02 None, // 0xe03 None, // 0xe10 - None, // 0xe11 + M1st_W_M8B_or_M4B, // 0xe11 cmpnoxadd None, // 0xe12 None, // 0xe13 None, // 0xe20 - None, // 0xe21 + M1st_W_M8B_or_M4B, // 0xe21 cmpbxadd None, // 0xe22 None, // 0xe23 None, // 0xe30 - None, // 0xe31 + M1st_W_M8B_or_M4B, // 0xe31 cmpnbxadd None, // 0xe32 None, // 0xe33 None, // 0xe40 - None, // 0xe41 + M1st_W_M8B_or_M4B, // 0xe41 cmpzxadd None, // 0xe42 None, // 0xe43 None, // 0xe50 - None, // 0xe51 + M1st_W_M8B_or_M4B, // 0xe51 cmpnzxadd None, // 0xe52 None, // 0xe53 None, // 0xe60 - None, // 0xe61 + M1st_W_M8B_or_M4B, // 0xe61 cmpbexadd None, // 0xe62 None, // 0xe63 None, // 0xe70 - None, // 0xe71 + M1st_W_M8B_or_M4B, // 0xe71 cmpnbexadd None, // 0xe72 None, // 0xe73 None, // 0xe80 - None, // 0xe81 + M1st_W_M8B_or_M4B, // 0xe81 cmpsxadd None, // 0xe82 None, // 0xe83 None, // 0xe90 - None, // 0xe91 + M1st_W_M8B_or_M4B, // 0xe91 cmpnsxadd None, // 0xe92 None, // 0xe93 None, // 0xea0 - None, // 0xea1 + M1st_W_M8B_or_M4B, // 0xea1 cmppxadd None, // 0xea2 None, // 0xea3 None, // 0xeb0 - None, // 0xeb1 + M1st_W_M8B_or_M4B, // 0xeb1 cmpnpxadd None, // 0xeb2 None, // 0xeb3 None, // 0xec0 - None, // 0xec1 + M1st_W_M8B_or_M4B, // 0xec1 cmplxadd None, // 0xec2 None, // 0xec3 None, // 0xed0 - None, // 0xed1 + M1st_W_M8B_or_M4B, // 0xed1 cmpnlxadd None, // 0xed2 None, // 0xed3 None, // 0xee0 - None, // 0xee1 + M1st_W_M8B_or_M4B, // 0xee1 cmplexadd None, // 0xee2 None, // 0xee3 None, // 0xef0 - None, // 0xef1 + M1st_W_M8B_or_M4B, // 0xef1 cmpnlexadd None, // 0xef2 None, // 0xef3 None, // 0xf00 @@ -6667,7 +6678,7 @@ namespace Amd64InstrDecode None, // 0xdd2 None, // 0xdd3 None, // 0xde0 - None, // 0xde1 + MOp_M16B_I1B, // 0xde1 vsm3rnds2 None, // 0xde2 None, // 0xde3 None, // 0xdf0 @@ -6978,8 +6989,8 @@ namespace Amd64InstrDecode None, // 0x2a1 MOp_W_M8B_or_M4B, // 0x2a2 vcvtsi2ss MOp_W_M8B_or_M4B, // 0x2a3 vcvtsi2sd - M1st_bLL_M4B_M16B_M32B_M64B, // 0x2b0 vmovntps - M1st_bLL_M8B_M16B_M32B_M64B, // 0x2b1 vmovntpd + M1st_LL_M16B_M32B_M64B, // 0x2b0 vmovntps + M1st_LL_M16B_M32B_M64B, // 0x2b1 vmovntpd None, // 0x2b2 None, // 0x2b3 None, // 0x2c0 @@ -7130,8 +7141,8 @@ namespace Amd64InstrDecode None, // 0x501 None, // 0x502 None, // 0x503 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x510 vsqrtps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x511 vsqrtpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x510 vsqrtps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x511 vsqrtpd MOp_M4B, // 0x512 vsqrtss MOp_M8B, // 0x513 vsqrtsd None, // 0x520 @@ -7158,12 +7169,12 @@ namespace Amd64InstrDecode MOp_bLL_M8B_M16B_M32B_M64B, // 0x571 vxorpd None, // 0x572 None, // 0x573 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x580 vaddps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x581 vaddpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x580 vaddps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x581 vaddpd MOp_M4B, // 0x582 vaddss MOp_M8B, // 0x583 vaddsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x590 vmulps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x591 vmulpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x590 vmulps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x591 vmulpd MOp_M4B, // 0x592 vmulss MOp_M8B, // 0x593 vmulsd MOp_bLL_M4B_M8B_M16B_M32B, // 0x5a0 vcvtps2pd @@ -7174,20 +7185,20 @@ namespace Amd64InstrDecode MOp_bLL_M4B_M16B_M32B_M64B, // 0x5b1 vcvtps2dq MOp_bLL_M4B_M16B_M32B_M64B, // 0x5b2 vcvttps2dq None, // 0x5b3 - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5c0 vsubps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5c1 vsubpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5c0 vsubps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5c1 vsubpd MOp_M4B, // 0x5c2 vsubss MOp_M8B, // 0x5c3 vsubsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5d0 vminps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5d1 vminpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5d0 vminps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5d1 vminpd MOp_M4B, // 0x5d2 vminss MOp_M8B, // 0x5d3 vminsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5e0 vdivps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5e1 vdivpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5e0 vdivps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5e1 vdivpd MOp_M4B, // 0x5e2 vdivss MOp_M8B, // 0x5e3 vdivsd - MOp_bLL_M4B_M16B_M32B_M64B, // 0x5f0 vmaxps - MOp_bLL_M8B_M16B_M32B_M64B, // 0x5f1 vmaxpd + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5f0 vmaxps + MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x5f1 vmaxpd MOp_M4B, // 0x5f2 vmaxss MOp_M8B, // 0x5f3 vmaxsd None, // 0x600 @@ -7382,12 +7393,12 @@ namespace Amd64InstrDecode None, // 0x8f1 None, // 0x8f2 None, // 0x8f3 - None, // 0x900 - None, // 0x901 + MOp_W_M8B_or_M2B, // 0x900 kmovq,kmovw + MOp_W_M4B_or_M1B, // 0x901 kmovb,kmovd None, // 0x902 None, // 0x903 - None, // 0x910 - None, // 0x911 + M1st_W_M8B_or_M2B, // 0x910 kmovq,kmovw + M1st_W_M4B_or_M1B, // 0x911 kmovb,kmovd None, // 0x912 None, // 0x913 None, // 0x920 @@ -8000,7 +8011,7 @@ namespace Amd64InstrDecode None, // 0x283 None, // 0x290 MOp_bLL_M8B_M16B_M32B_M64B, // 0x291 vpcmpeqq - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x292 vpmovb2m,vpmovw2m + None, // 0x292 None, // 0x293 None, // 0x2a0 MOp_LL_M16B_M32B_M64B, // 0x2a1 vmovntdqa @@ -8064,7 +8075,7 @@ namespace Amd64InstrDecode None, // 0x383 None, // 0x390 MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x391 vpminsd,vpminsq - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x392 vpmovd2m,vpmovq2m + None, // 0x392 None, // 0x393 None, // 0x3a0 MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x3a1 vpminuw @@ -8126,8 +8137,8 @@ namespace Amd64InstrDecode None, // 0x481 None, // 0x482 None, // 0x483 - None, // 0x490 - None, // 0x491 + MOnly_MUnknown, // 0x490 ldtilecfg + MOnly_MUnknown, // 0x491 sttilecfg None, // 0x492 None, // 0x493 None, // 0x4a0 @@ -8154,20 +8165,20 @@ namespace Amd64InstrDecode MOp_W_M8B_or_M4B, // 0x4f1 vrsqrt14sd,vrsqrt14ss None, // 0x4f2 None, // 0x4f3 - None, // 0x500 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x501 vpdpbusd - None, // 0x502 - None, // 0x503 - None, // 0x510 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x511 vpdpbusds - None, // 0x512 - None, // 0x513 + MOp_bLL_M4B_M16B_M32B_M64B, // 0x500 vpdpbuud + MOp_bLL_M4B_M16B_M32B_M64B, // 0x501 vpdpbusd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x502 vpdpbsud + MOp_bLL_M4B_M16B_M32B_M64B, // 0x503 vpdpbssd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x510 vpdpbuuds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x511 vpdpbusds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x512 vpdpbsuds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x513 vpdpbssds None, // 0x520 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x521 vpdpwssd + MOp_bLL_M4B_M16B_M32B_M64B, // 0x521 vpdpwssd MOp_bLL_M4B_M16B_M32B_M64B, // 0x522 vdpbf16ps MOp_M16B, // 0x523 vp4dpwssd None, // 0x530 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0x531 vpdpwssds + MOp_bLL_M4B_M16B_M32B_M64B, // 0x531 vpdpwssds None, // 0x532 MOp_M16B, // 0x533 vp4dpwssds None, // 0x540 @@ -8555,11 +8566,11 @@ namespace Amd64InstrDecode None, // 0xb32 None, // 0xb33 None, // 0xb40 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0xb41 vpmadd52luq + MOp_bLL_M8B_M16B_M32B_M64B, // 0xb41 vpmadd52luq None, // 0xb42 None, // 0xb43 None, // 0xb50 - MOp_bWLL_M4B_M8B_M16B_M32B_M64B, // 0xb51 vpmadd52huq + MOp_bLL_M8B_M16B_M32B_M64B, // 0xb51 vpmadd52huq None, // 0xb52 None, // 0xb53 None, // 0xb60 @@ -8731,67 +8742,67 @@ namespace Amd64InstrDecode None, // 0xdf2 None, // 0xdf3 None, // 0xe00 - None, // 0xe01 + M1st_W_M8B_or_M4B, // 0xe01 cmpoxadd None, // 0xe02 None, // 0xe03 None, // 0xe10 - None, // 0xe11 + M1st_W_M8B_or_M4B, // 0xe11 cmpnoxadd None, // 0xe12 None, // 0xe13 None, // 0xe20 - None, // 0xe21 + M1st_W_M8B_or_M4B, // 0xe21 cmpbxadd None, // 0xe22 None, // 0xe23 None, // 0xe30 - None, // 0xe31 + M1st_W_M8B_or_M4B, // 0xe31 cmpnbxadd None, // 0xe32 None, // 0xe33 None, // 0xe40 - None, // 0xe41 + M1st_W_M8B_or_M4B, // 0xe41 cmpzxadd None, // 0xe42 None, // 0xe43 None, // 0xe50 - None, // 0xe51 + M1st_W_M8B_or_M4B, // 0xe51 cmpnzxadd None, // 0xe52 None, // 0xe53 None, // 0xe60 - None, // 0xe61 + M1st_W_M8B_or_M4B, // 0xe61 cmpbexadd None, // 0xe62 None, // 0xe63 None, // 0xe70 - None, // 0xe71 + M1st_W_M8B_or_M4B, // 0xe71 cmpnbexadd None, // 0xe72 None, // 0xe73 None, // 0xe80 - None, // 0xe81 + M1st_W_M8B_or_M4B, // 0xe81 cmpsxadd None, // 0xe82 None, // 0xe83 None, // 0xe90 - None, // 0xe91 + M1st_W_M8B_or_M4B, // 0xe91 cmpnsxadd None, // 0xe92 None, // 0xe93 None, // 0xea0 - None, // 0xea1 + M1st_W_M8B_or_M4B, // 0xea1 cmppxadd None, // 0xea2 None, // 0xea3 None, // 0xeb0 - None, // 0xeb1 + M1st_W_M8B_or_M4B, // 0xeb1 cmpnpxadd None, // 0xeb2 None, // 0xeb3 None, // 0xec0 - None, // 0xec1 + M1st_W_M8B_or_M4B, // 0xec1 cmplxadd None, // 0xec2 None, // 0xec3 None, // 0xed0 - None, // 0xed1 + M1st_W_M8B_or_M4B, // 0xed1 cmpnlxadd None, // 0xed2 None, // 0xed3 None, // 0xee0 - None, // 0xee1 + M1st_W_M8B_or_M4B, // 0xee1 cmplexadd None, // 0xee2 None, // 0xee3 None, // 0xef0 - None, // 0xef1 + M1st_W_M8B_or_M4B, // 0xef1 cmpnlexadd None, // 0xef2 None, // 0xef3 None, // 0xf00 @@ -8802,11 +8813,11 @@ namespace Amd64InstrDecode None, // 0xf11 None, // 0xf12 None, // 0xf13 - None, // 0xf20 + MOp_W_M8B_or_M4B, // 0xf20 andn None, // 0xf21 None, // 0xf22 None, // 0xf23 - None, // 0xf30 + MOp_W_M8B_or_M4B, // 0xf30 blsi,blsmsk,blsr None, // 0xf31 None, // 0xf32 None, // 0xf33 @@ -8814,18 +8825,18 @@ namespace Amd64InstrDecode None, // 0xf41 None, // 0xf42 None, // 0xf43 - None, // 0xf50 + MOp_W_M8B_or_M4B, // 0xf50 bzhi None, // 0xf51 - None, // 0xf52 - None, // 0xf53 + MOp_W_M8B_or_M4B, // 0xf52 pext + MOp_W_M8B_or_M4B, // 0xf53 pdep None, // 0xf60 None, // 0xf61 None, // 0xf62 - None, // 0xf63 - None, // 0xf70 - None, // 0xf71 - None, // 0xf72 - None, // 0xf73 + MOp_W_M8B_or_M4B, // 0xf63 mulx + MOp_W_M8B_or_M4B, // 0xf70 bextr + MOp_W_M8B_or_M4B, // 0xf71 shlx + MOp_W_M8B_or_M4B, // 0xf72 sarx + MOp_W_M8B_or_M4B, // 0xf73 shrx None, // 0xf80 None, // 0xf81 None, // 0xf82 @@ -9691,7 +9702,7 @@ namespace Amd64InstrDecode None, // 0xce2 None, // 0xce3 None, // 0xcf0 - None, // 0xcf1 + MOp_I1B_bLL_M8B_M16B_M32B_M64B, // 0xcf1 vgf2p8affineinvqb None, // 0xcf2 None, // 0xcf3 None, // 0xd00 @@ -9825,7 +9836,7 @@ namespace Amd64InstrDecode None, // 0xf00 None, // 0xf01 None, // 0xf02 - None, // 0xf03 + MOp_I1B_W_M8B_or_M4B, // 0xf03 rorx None, // 0xf10 None, // 0xf11 None, // 0xf12 @@ -9887,4 +9898,1032 @@ namespace Amd64InstrDecode None, // 0xff2 None, // 0xff3 }; + + static const InstrForm instrFormEvex_4[1024] + { + M1st_M1B, // 0x000 add + None, // 0x001 + None, // 0x002 + None, // 0x003 + M1st_W_M8B_or_M4B, // 0x010 add + M1st_W_M8B_or_M2B, // 0x011 add + None, // 0x012 + None, // 0x013 + MOp_M1B, // 0x020 add + None, // 0x021 + None, // 0x022 + None, // 0x023 + MOp_W_M8B_or_M4B, // 0x030 add + MOp_W_M8B_or_M2B, // 0x031 add + None, // 0x032 + None, // 0x033 + None, // 0x040 + None, // 0x041 + None, // 0x042 + None, // 0x043 + None, // 0x050 + None, // 0x051 + None, // 0x052 + None, // 0x053 + None, // 0x060 + None, // 0x061 + None, // 0x062 + None, // 0x063 + None, // 0x070 + None, // 0x071 + None, // 0x072 + None, // 0x073 + M1st_M1B, // 0x080 or + None, // 0x081 + None, // 0x082 + None, // 0x083 + M1st_W_M8B_or_M4B, // 0x090 or + M1st_W_M8B_or_M2B, // 0x091 or + None, // 0x092 + None, // 0x093 + MOp_M1B, // 0x0a0 or + None, // 0x0a1 + None, // 0x0a2 + None, // 0x0a3 + MOp_W_M8B_or_M4B, // 0x0b0 or + MOp_W_M8B_or_M2B, // 0x0b1 or + None, // 0x0b2 + None, // 0x0b3 + None, // 0x0c0 + None, // 0x0c1 + None, // 0x0c2 + None, // 0x0c3 + None, // 0x0d0 + None, // 0x0d1 + None, // 0x0d2 + None, // 0x0d3 + None, // 0x0e0 + None, // 0x0e1 + None, // 0x0e2 + None, // 0x0e3 + None, // 0x0f0 + None, // 0x0f1 + None, // 0x0f2 + None, // 0x0f3 + M1st_M1B, // 0x100 adc + None, // 0x101 + None, // 0x102 + None, // 0x103 + M1st_W_M8B_or_M4B, // 0x110 adc + M1st_W_M8B_or_M2B, // 0x111 adc + None, // 0x112 + None, // 0x113 + MOp_M1B, // 0x120 adc + None, // 0x121 + None, // 0x122 + None, // 0x123 + MOp_W_M8B_or_M4B, // 0x130 adc + MOp_W_M8B_or_M2B, // 0x131 adc + None, // 0x132 + None, // 0x133 + None, // 0x140 + None, // 0x141 + None, // 0x142 + None, // 0x143 + None, // 0x150 + None, // 0x151 + None, // 0x152 + None, // 0x153 + None, // 0x160 + None, // 0x161 + None, // 0x162 + None, // 0x163 + None, // 0x170 + None, // 0x171 + None, // 0x172 + None, // 0x173 + M1st_M1B, // 0x180 sbb + None, // 0x181 + None, // 0x182 + None, // 0x183 + M1st_W_M8B_or_M4B, // 0x190 sbb + M1st_W_M8B_or_M2B, // 0x191 sbb + None, // 0x192 + None, // 0x193 + MOp_M1B, // 0x1a0 sbb + None, // 0x1a1 + None, // 0x1a2 + None, // 0x1a3 + MOp_W_M8B_or_M4B, // 0x1b0 sbb + MOp_W_M8B_or_M2B, // 0x1b1 sbb + None, // 0x1b2 + None, // 0x1b3 + None, // 0x1c0 + None, // 0x1c1 + None, // 0x1c2 + None, // 0x1c3 + None, // 0x1d0 + None, // 0x1d1 + None, // 0x1d2 + None, // 0x1d3 + None, // 0x1e0 + None, // 0x1e1 + None, // 0x1e2 + None, // 0x1e3 + None, // 0x1f0 + None, // 0x1f1 + None, // 0x1f2 + None, // 0x1f3 + M1st_M1B, // 0x200 and + None, // 0x201 + None, // 0x202 + None, // 0x203 + M1st_W_M8B_or_M4B, // 0x210 and + M1st_W_M8B_or_M2B, // 0x211 and + None, // 0x212 + None, // 0x213 + MOp_M1B, // 0x220 and + None, // 0x221 + None, // 0x222 + None, // 0x223 + MOp_W_M8B_or_M4B, // 0x230 and + MOp_W_M8B_or_M2B, // 0x231 and + None, // 0x232 + None, // 0x233 + M1st_I1B_W_M8B_or_M4B, // 0x240 shld + M1st_I1B_W_M8B_or_M2B, // 0x241 shld + None, // 0x242 + None, // 0x243 + None, // 0x250 + None, // 0x251 + None, // 0x252 + None, // 0x253 + None, // 0x260 + None, // 0x261 + None, // 0x262 + None, // 0x263 + None, // 0x270 + None, // 0x271 + None, // 0x272 + None, // 0x273 + M1st_M1B, // 0x280 sub + None, // 0x281 + None, // 0x282 + None, // 0x283 + M1st_W_M8B_or_M4B, // 0x290 sub + M1st_W_M8B_or_M2B, // 0x291 sub + None, // 0x292 + None, // 0x293 + MOp_M1B, // 0x2a0 sub + None, // 0x2a1 + None, // 0x2a2 + None, // 0x2a3 + MOp_W_M8B_or_M4B, // 0x2b0 sub + MOp_W_M8B_or_M2B, // 0x2b1 sub + None, // 0x2b2 + None, // 0x2b3 + M1st_I1B_W_M8B_or_M4B, // 0x2c0 shrd + M1st_I1B_W_M8B_or_M2B, // 0x2c1 shrd + None, // 0x2c2 + None, // 0x2c3 + None, // 0x2d0 + None, // 0x2d1 + None, // 0x2d2 + None, // 0x2d3 + None, // 0x2e0 + None, // 0x2e1 + None, // 0x2e2 + None, // 0x2e3 + None, // 0x2f0 + None, // 0x2f1 + None, // 0x2f2 + None, // 0x2f3 + M1st_M1B, // 0x300 xor + None, // 0x301 + None, // 0x302 + None, // 0x303 + M1st_W_M8B_or_M4B, // 0x310 xor + M1st_W_M8B_or_M2B, // 0x311 xor + None, // 0x312 + None, // 0x313 + MOp_M1B, // 0x320 xor + None, // 0x321 + None, // 0x322 + None, // 0x323 + MOp_W_M8B_or_M4B, // 0x330 xor + MOp_W_M8B_or_M2B, // 0x331 xor + None, // 0x332 + None, // 0x333 + None, // 0x340 + None, // 0x341 + None, // 0x342 + None, // 0x343 + None, // 0x350 + None, // 0x351 + None, // 0x352 + None, // 0x353 + None, // 0x360 + None, // 0x361 + None, // 0x362 + None, // 0x363 + None, // 0x370 + None, // 0x371 + None, // 0x372 + None, // 0x373 + None, // 0x380 + None, // 0x381 + None, // 0x382 + None, // 0x383 + None, // 0x390 + None, // 0x391 + None, // 0x392 + None, // 0x393 + None, // 0x3a0 + None, // 0x3a1 + None, // 0x3a2 + None, // 0x3a3 + None, // 0x3b0 + None, // 0x3b1 + None, // 0x3b2 + None, // 0x3b3 + None, // 0x3c0 + None, // 0x3c1 + None, // 0x3c2 + None, // 0x3c3 + None, // 0x3d0 + None, // 0x3d1 + None, // 0x3d2 + None, // 0x3d3 + None, // 0x3e0 + None, // 0x3e1 + None, // 0x3e2 + None, // 0x3e3 + None, // 0x3f0 + None, // 0x3f1 + None, // 0x3f2 + None, // 0x3f3 + MOp_W_M8B_or_M4B, // 0x400 cmovo + MOp_W_M8B_or_M2B, // 0x401 cmovo + None, // 0x402 + None, // 0x403 + MOp_W_M8B_or_M4B, // 0x410 cmovno + MOp_W_M8B_or_M2B, // 0x411 cmovno + None, // 0x412 + None, // 0x413 + MOp_W_M8B_or_M4B, // 0x420 cmovb + MOp_W_M8B_or_M2B, // 0x421 cmovb + None, // 0x422 + None, // 0x423 + MOp_W_M8B_or_M4B, // 0x430 cmovae + MOp_W_M8B_or_M2B, // 0x431 cmovae + None, // 0x432 + None, // 0x433 + MOp_W_M8B_or_M4B, // 0x440 cmove + MOp_W_M8B_or_M2B, // 0x441 cmove + None, // 0x442 + None, // 0x443 + MOp_W_M8B_or_M4B, // 0x450 cmovne + MOp_W_M8B_or_M2B, // 0x451 cmovne + None, // 0x452 + None, // 0x453 + MOp_W_M8B_or_M4B, // 0x460 cmovbe + MOp_W_M8B_or_M2B, // 0x461 cmovbe + None, // 0x462 + None, // 0x463 + MOp_W_M8B_or_M4B, // 0x470 cmova + MOp_W_M8B_or_M2B, // 0x471 cmova + None, // 0x472 + None, // 0x473 + MOp_W_M8B_or_M4B, // 0x480 cmovs + MOp_W_M8B_or_M2B, // 0x481 cmovs + None, // 0x482 + None, // 0x483 + MOp_W_M8B_or_M4B, // 0x490 cmovns + MOp_W_M8B_or_M2B, // 0x491 cmovns + None, // 0x492 + None, // 0x493 + MOp_W_M8B_or_M4B, // 0x4a0 cmovp + MOp_W_M8B_or_M2B, // 0x4a1 cmovp + None, // 0x4a2 + None, // 0x4a3 + MOp_W_M8B_or_M4B, // 0x4b0 cmovnp + MOp_W_M8B_or_M2B, // 0x4b1 cmovnp + None, // 0x4b2 + None, // 0x4b3 + MOp_W_M8B_or_M4B, // 0x4c0 cmovl + MOp_W_M8B_or_M2B, // 0x4c1 cmovl + None, // 0x4c2 + None, // 0x4c3 + MOp_W_M8B_or_M4B, // 0x4d0 cmovge + MOp_W_M8B_or_M2B, // 0x4d1 cmovge + None, // 0x4d2 + None, // 0x4d3 + MOp_W_M8B_or_M4B, // 0x4e0 cmovle + MOp_W_M8B_or_M2B, // 0x4e1 cmovle + None, // 0x4e2 + None, // 0x4e3 + MOp_W_M8B_or_M4B, // 0x4f0 cmovg + MOp_W_M8B_or_M2B, // 0x4f1 cmovg + None, // 0x4f2 + None, // 0x4f3 + None, // 0x500 + None, // 0x501 + None, // 0x502 + None, // 0x503 + None, // 0x510 + None, // 0x511 + None, // 0x512 + None, // 0x513 + None, // 0x520 + None, // 0x521 + None, // 0x522 + None, // 0x523 + None, // 0x530 + None, // 0x531 + None, // 0x532 + None, // 0x533 + None, // 0x540 + None, // 0x541 + None, // 0x542 + None, // 0x543 + None, // 0x550 + None, // 0x551 + None, // 0x552 + None, // 0x553 + None, // 0x560 + None, // 0x561 + None, // 0x562 + None, // 0x563 + None, // 0x570 + None, // 0x571 + None, // 0x572 + None, // 0x573 + None, // 0x580 + None, // 0x581 + None, // 0x582 + None, // 0x583 + None, // 0x590 + None, // 0x591 + None, // 0x592 + None, // 0x593 + None, // 0x5a0 + None, // 0x5a1 + None, // 0x5a2 + None, // 0x5a3 + None, // 0x5b0 + None, // 0x5b1 + None, // 0x5b2 + None, // 0x5b3 + None, // 0x5c0 + None, // 0x5c1 + None, // 0x5c2 + None, // 0x5c3 + None, // 0x5d0 + None, // 0x5d1 + None, // 0x5d2 + None, // 0x5d3 + None, // 0x5e0 + None, // 0x5e1 + None, // 0x5e2 + None, // 0x5e3 + None, // 0x5f0 + None, // 0x5f1 + None, // 0x5f2 + None, // 0x5f3 + MOp_W_M8B_or_M4B, // 0x600 movbe + MOp_W_M8B_or_M2B, // 0x601 movbe + None, // 0x602 + None, // 0x603 + M1st_W_M8B_or_M4B, // 0x610 movbe + M1st_W_M8B_or_M2B, // 0x611 movbe + None, // 0x612 + None, // 0x613 + None, // 0x620 + None, // 0x621 + None, // 0x622 + None, // 0x623 + None, // 0x630 + None, // 0x631 + None, // 0x632 + None, // 0x633 + None, // 0x640 + None, // 0x641 + None, // 0x642 + None, // 0x643 + None, // 0x650 + M1st_MUnknown, // 0x651 wrussd,wrussq + None, // 0x652 + None, // 0x653 + M1st_MUnknown, // 0x660 wrssd,wrssq + MOp_W_M8B_or_M4B, // 0x661 adcx + MOp_W_M8B_or_M4B, // 0x662 adox + None, // 0x663 + None, // 0x670 + None, // 0x671 + None, // 0x672 + None, // 0x673 + None, // 0x680 + None, // 0x681 + None, // 0x682 + None, // 0x683 + None, // 0x690 + None, // 0x691 + None, // 0x692 + None, // 0x693 + None, // 0x6a0 + None, // 0x6a1 + None, // 0x6a2 + None, // 0x6a3 + None, // 0x6b0 + None, // 0x6b1 + None, // 0x6b2 + None, // 0x6b3 + None, // 0x6c0 + None, // 0x6c1 + None, // 0x6c2 + None, // 0x6c3 + None, // 0x6d0 + None, // 0x6d1 + None, // 0x6d2 + None, // 0x6d3 + None, // 0x6e0 + None, // 0x6e1 + None, // 0x6e2 + None, // 0x6e3 + None, // 0x6f0 + None, // 0x6f1 + None, // 0x6f2 + None, // 0x6f3 + None, // 0x700 + None, // 0x701 + None, // 0x702 + None, // 0x703 + None, // 0x710 + None, // 0x711 + None, // 0x712 + None, // 0x713 + None, // 0x720 + None, // 0x721 + None, // 0x722 + None, // 0x723 + None, // 0x730 + None, // 0x731 + None, // 0x732 + None, // 0x733 + None, // 0x740 + None, // 0x741 + None, // 0x742 + None, // 0x743 + None, // 0x750 + None, // 0x751 + None, // 0x752 + None, // 0x753 + None, // 0x760 + None, // 0x761 + None, // 0x762 + None, // 0x763 + None, // 0x770 + None, // 0x771 + None, // 0x772 + None, // 0x773 + None, // 0x780 + None, // 0x781 + None, // 0x782 + None, // 0x783 + None, // 0x790 + None, // 0x791 + None, // 0x792 + None, // 0x793 + None, // 0x7a0 + None, // 0x7a1 + None, // 0x7a2 + None, // 0x7a3 + None, // 0x7b0 + None, // 0x7b1 + None, // 0x7b2 + None, // 0x7b3 + None, // 0x7c0 + None, // 0x7c1 + None, // 0x7c2 + None, // 0x7c3 + None, // 0x7d0 + None, // 0x7d1 + None, // 0x7d2 + None, // 0x7d3 + None, // 0x7e0 + None, // 0x7e1 + None, // 0x7e2 + None, // 0x7e3 + None, // 0x7f0 + None, // 0x7f1 + None, // 0x7f2 + None, // 0x7f3 + M1st_M1B_I1B, // 0x800 adc,add,and,or,sbb,sub,xor + None, // 0x801 + None, // 0x802 + None, // 0x803 + M1st_I4B_W_M8B_or_M4B, // 0x810 adc,add,and,or,sbb,sub,xor + M1st_W_M8B_I4B_or_M2B_I2B, // 0x811 adc,add,and,or,sbb,sub,xor + None, // 0x812 + None, // 0x813 + None, // 0x820 + None, // 0x821 + None, // 0x822 + None, // 0x823 + M1st_I1B_W_M8B_or_M4B, // 0x830 adc,add,and,or,sbb,sub,xor + M1st_I1B_W_M8B_or_M2B, // 0x831 adc,add,and,or,sbb,sub,xor + None, // 0x832 + None, // 0x833 + None, // 0x840 + None, // 0x841 + None, // 0x842 + None, // 0x843 + None, // 0x850 + None, // 0x851 + None, // 0x852 + None, // 0x853 + None, // 0x860 + None, // 0x861 + None, // 0x862 + None, // 0x863 + None, // 0x870 + None, // 0x871 + None, // 0x872 + None, // 0x873 + None, // 0x880 + None, // 0x881 + None, // 0x882 + None, // 0x883 + None, // 0x890 + None, // 0x891 + None, // 0x892 + None, // 0x893 + None, // 0x8a0 + None, // 0x8a1 + None, // 0x8a2 + None, // 0x8a3 + None, // 0x8b0 + None, // 0x8b1 + None, // 0x8b2 + None, // 0x8b3 + None, // 0x8c0 + None, // 0x8c1 + None, // 0x8c2 + None, // 0x8c3 + None, // 0x8d0 + None, // 0x8d1 + None, // 0x8d2 + None, // 0x8d3 + None, // 0x8e0 + None, // 0x8e1 + None, // 0x8e2 + None, // 0x8e3 + None, // 0x8f0 + None, // 0x8f1 + None, // 0x8f2 + None, // 0x8f3 + None, // 0x900 + None, // 0x901 + None, // 0x902 + None, // 0x903 + None, // 0x910 + None, // 0x911 + None, // 0x912 + None, // 0x913 + None, // 0x920 + None, // 0x921 + None, // 0x922 + None, // 0x923 + None, // 0x930 + None, // 0x931 + None, // 0x932 + None, // 0x933 + None, // 0x940 + None, // 0x941 + None, // 0x942 + None, // 0x943 + None, // 0x950 + None, // 0x951 + None, // 0x952 + None, // 0x953 + None, // 0x960 + None, // 0x961 + None, // 0x962 + None, // 0x963 + None, // 0x970 + None, // 0x971 + None, // 0x972 + None, // 0x973 + None, // 0x980 + None, // 0x981 + None, // 0x982 + None, // 0x983 + None, // 0x990 + None, // 0x991 + None, // 0x992 + None, // 0x993 + None, // 0x9a0 + None, // 0x9a1 + None, // 0x9a2 + None, // 0x9a3 + None, // 0x9b0 + None, // 0x9b1 + None, // 0x9b2 + None, // 0x9b3 + None, // 0x9c0 + None, // 0x9c1 + None, // 0x9c2 + None, // 0x9c3 + None, // 0x9d0 + None, // 0x9d1 + None, // 0x9d2 + None, // 0x9d3 + None, // 0x9e0 + None, // 0x9e1 + None, // 0x9e2 + None, // 0x9e3 + None, // 0x9f0 + None, // 0x9f1 + None, // 0x9f2 + None, // 0x9f3 + None, // 0xa00 + None, // 0xa01 + None, // 0xa02 + None, // 0xa03 + None, // 0xa10 + None, // 0xa11 + None, // 0xa12 + None, // 0xa13 + None, // 0xa20 + None, // 0xa21 + None, // 0xa22 + None, // 0xa23 + None, // 0xa30 + None, // 0xa31 + None, // 0xa32 + None, // 0xa33 + None, // 0xa40 + None, // 0xa41 + None, // 0xa42 + None, // 0xa43 + M1st_W_M8B_or_M4B, // 0xa50 shld + M1st_W_M8B_or_M2B, // 0xa51 shld + None, // 0xa52 + None, // 0xa53 + None, // 0xa60 + None, // 0xa61 + None, // 0xa62 + None, // 0xa63 + None, // 0xa70 + None, // 0xa71 + None, // 0xa72 + None, // 0xa73 + None, // 0xa80 + None, // 0xa81 + None, // 0xa82 + None, // 0xa83 + None, // 0xa90 + None, // 0xa91 + None, // 0xa92 + None, // 0xa93 + None, // 0xaa0 + None, // 0xaa1 + None, // 0xaa2 + None, // 0xaa3 + None, // 0xab0 + None, // 0xab1 + None, // 0xab2 + None, // 0xab3 + None, // 0xac0 + None, // 0xac1 + None, // 0xac2 + None, // 0xac3 + M1st_W_M8B_or_M4B, // 0xad0 shrd + M1st_W_M8B_or_M2B, // 0xad1 shrd + None, // 0xad2 + None, // 0xad3 + None, // 0xae0 + None, // 0xae1 + None, // 0xae2 + None, // 0xae3 + MOp_W_M8B_or_M4B, // 0xaf0 imul + MOp_W_M8B_or_M2B, // 0xaf1 imul + None, // 0xaf2 + None, // 0xaf3 + None, // 0xb00 + None, // 0xb01 + None, // 0xb02 + None, // 0xb03 + None, // 0xb10 + None, // 0xb11 + None, // 0xb12 + None, // 0xb13 + None, // 0xb20 + None, // 0xb21 + None, // 0xb22 + None, // 0xb23 + None, // 0xb30 + None, // 0xb31 + None, // 0xb32 + None, // 0xb33 + None, // 0xb40 + None, // 0xb41 + None, // 0xb42 + None, // 0xb43 + None, // 0xb50 + None, // 0xb51 + None, // 0xb52 + None, // 0xb53 + None, // 0xb60 + None, // 0xb61 + None, // 0xb62 + None, // 0xb63 + None, // 0xb70 + None, // 0xb71 + None, // 0xb72 + None, // 0xb73 + None, // 0xb80 + None, // 0xb81 + None, // 0xb82 + None, // 0xb83 + None, // 0xb90 + None, // 0xb91 + None, // 0xb92 + None, // 0xb93 + None, // 0xba0 + None, // 0xba1 + None, // 0xba2 + None, // 0xba3 + None, // 0xbb0 + None, // 0xbb1 + None, // 0xbb2 + None, // 0xbb3 + None, // 0xbc0 + None, // 0xbc1 + None, // 0xbc2 + None, // 0xbc3 + None, // 0xbd0 + None, // 0xbd1 + None, // 0xbd2 + None, // 0xbd3 + None, // 0xbe0 + None, // 0xbe1 + None, // 0xbe2 + None, // 0xbe3 + None, // 0xbf0 + None, // 0xbf1 + None, // 0xbf2 + None, // 0xbf3 + M1st_M1B_I1B, // 0xc00 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xc01 + None, // 0xc02 + None, // 0xc03 + M1st_I1B_W_M8B_or_M4B, // 0xc10 rcl,rcr,rol,ror,sar,shl,shr + M1st_I1B_W_M8B_or_M2B, // 0xc11 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xc12 + None, // 0xc13 + None, // 0xc20 + None, // 0xc21 + None, // 0xc22 + None, // 0xc23 + None, // 0xc30 + None, // 0xc31 + None, // 0xc32 + None, // 0xc33 + None, // 0xc40 + None, // 0xc41 + None, // 0xc42 + None, // 0xc43 + None, // 0xc50 + None, // 0xc51 + None, // 0xc52 + None, // 0xc53 + None, // 0xc60 + None, // 0xc61 + None, // 0xc62 + None, // 0xc63 + None, // 0xc70 + None, // 0xc71 + None, // 0xc72 + None, // 0xc73 + None, // 0xc80 + None, // 0xc81 + None, // 0xc82 + None, // 0xc83 + None, // 0xc90 + None, // 0xc91 + None, // 0xc92 + None, // 0xc93 + None, // 0xca0 + None, // 0xca1 + None, // 0xca2 + None, // 0xca3 + None, // 0xcb0 + None, // 0xcb1 + None, // 0xcb2 + None, // 0xcb3 + None, // 0xcc0 + None, // 0xcc1 + None, // 0xcc2 + None, // 0xcc3 + None, // 0xcd0 + None, // 0xcd1 + None, // 0xcd2 + None, // 0xcd3 + None, // 0xce0 + None, // 0xce1 + None, // 0xce2 + None, // 0xce3 + None, // 0xcf0 + None, // 0xcf1 + None, // 0xcf2 + None, // 0xcf3 + M1st_M1B, // 0xd00 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd01 + None, // 0xd02 + None, // 0xd03 + M1st_W_M8B_or_M4B, // 0xd10 rcl,rcr,rol,ror,sar,shl,shr + M1st_W_M8B_or_M2B, // 0xd11 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd12 + None, // 0xd13 + M1st_M1B, // 0xd20 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd21 + None, // 0xd22 + None, // 0xd23 + M1st_W_M8B_or_M4B, // 0xd30 rcl,rcr,rol,ror,sar,shl,shr + M1st_W_M8B_or_M2B, // 0xd31 rcl,rcr,rol,ror,sar,shl,shr + None, // 0xd32 + None, // 0xd33 + MOp_M16B_I1B, // 0xd40 sha1rnds4 + None, // 0xd41 + None, // 0xd42 + None, // 0xd43 + None, // 0xd50 + None, // 0xd51 + None, // 0xd52 + None, // 0xd53 + None, // 0xd60 + None, // 0xd61 + None, // 0xd62 + None, // 0xd63 + None, // 0xd70 + None, // 0xd71 + None, // 0xd72 + None, // 0xd73 + MOp_M16B, // 0xd80 sha1nexte + None, // 0xd81 + MOnly_MUnknown, // 0xd82 aesdecwide128kl,aesdecwide256kl,aesencwide128kl,aesencwide256kl + None, // 0xd83 + MOp_M16B, // 0xd90 sha1msg1 + None, // 0xd91 + None, // 0xd92 + None, // 0xd93 + MOp_M16B, // 0xda0 sha1msg2 + None, // 0xda1 + None, // 0xda2 + None, // 0xda3 + MOp_M16B, // 0xdb0 sha256rnds2 + None, // 0xdb1 + None, // 0xdb2 + None, // 0xdb3 + MOp_M16B, // 0xdc0 sha256msg1 + None, // 0xdc1 + MOp_MUnknown, // 0xdc2 aesenc128kl + None, // 0xdc3 + MOp_M16B, // 0xdd0 sha256msg2 + None, // 0xdd1 + MOp_MUnknown, // 0xdd2 aesdec128kl + None, // 0xdd3 + None, // 0xde0 + None, // 0xde1 + MOp_MUnknown, // 0xde2 aesenc256kl + None, // 0xde3 + None, // 0xdf0 + None, // 0xdf1 + MOp_MUnknown, // 0xdf2 aesdec256kl + None, // 0xdf3 + None, // 0xe00 + None, // 0xe01 + None, // 0xe02 + None, // 0xe03 + None, // 0xe10 + None, // 0xe11 + None, // 0xe12 + None, // 0xe13 + None, // 0xe20 + None, // 0xe21 + None, // 0xe22 + None, // 0xe23 + None, // 0xe30 + None, // 0xe31 + None, // 0xe32 + None, // 0xe33 + None, // 0xe40 + None, // 0xe41 + None, // 0xe42 + None, // 0xe43 + None, // 0xe50 + None, // 0xe51 + None, // 0xe52 + None, // 0xe53 + None, // 0xe60 + None, // 0xe61 + None, // 0xe62 + None, // 0xe63 + None, // 0xe70 + None, // 0xe71 + None, // 0xe72 + None, // 0xe73 + None, // 0xe80 + None, // 0xe81 + None, // 0xe82 + None, // 0xe83 + None, // 0xe90 + None, // 0xe91 + None, // 0xe92 + None, // 0xe93 + None, // 0xea0 + None, // 0xea1 + None, // 0xea2 + None, // 0xea3 + None, // 0xeb0 + None, // 0xeb1 + None, // 0xeb2 + None, // 0xeb3 + None, // 0xec0 + None, // 0xec1 + None, // 0xec2 + None, // 0xec3 + None, // 0xed0 + None, // 0xed1 + None, // 0xed2 + None, // 0xed3 + None, // 0xee0 + None, // 0xee1 + None, // 0xee2 + None, // 0xee3 + None, // 0xef0 + None, // 0xef1 + None, // 0xef2 + None, // 0xef3 + MOp_M1B, // 0xf00 crc32 + None, // 0xf01 + MOp_M16B, // 0xf02 invept + None, // 0xf03 + MOp_W_M8B_or_M4B, // 0xf10 crc32 + MOp_W_M8B_or_M2B, // 0xf11 crc32 + MOp_M16B, // 0xf12 invvpid + None, // 0xf13 + None, // 0xf20 + None, // 0xf21 + MOp_MUnknown, // 0xf22 invpcid + None, // 0xf23 + None, // 0xf30 + None, // 0xf31 + None, // 0xf32 + None, // 0xf33 + None, // 0xf40 + None, // 0xf41 + None, // 0xf42 + None, // 0xf43 + None, // 0xf50 + None, // 0xf51 + None, // 0xf52 + None, // 0xf53 + MOnly_M1B, // 0xf60 neg,not + None, // 0xf61 + None, // 0xf62 + None, // 0xf63 + MOnly_W_M8B_or_M4B, // 0xf70 neg,not + MOnly_W_M8B_or_M2B, // 0xf71 neg,not + None, // 0xf72 + None, // 0xf73 + None, // 0xf80 + MOp_MUnknown, // 0xf81 movdir64b + MOp_MUnknown, // 0xf82 enqcmds + MOp_MUnknown, // 0xf83 enqcmd + M1st_W_M8B_or_M4B, // 0xf90 movdiri + None, // 0xf91 + None, // 0xf92 + None, // 0xf93 + None, // 0xfa0 + None, // 0xfa1 + None, // 0xfa2 + None, // 0xfa3 + None, // 0xfb0 + None, // 0xfb1 + None, // 0xfb2 + None, // 0xfb3 + M1st_W_M8B_or_M4B, // 0xfc0 aadd + M1st_W_M8B_or_M4B, // 0xfc1 aand + M1st_W_M8B_or_M4B, // 0xfc2 axor + M1st_W_M8B_or_M4B, // 0xfc3 aor + None, // 0xfd0 + None, // 0xfd1 + None, // 0xfd2 + None, // 0xfd3 + MOnly_M1B, // 0xfe0 dec,inc + None, // 0xfe1 + None, // 0xfe2 + None, // 0xfe3 + MOnly_W_M8B_or_M4B, // 0xff0 dec,inc + None, // 0xff1 + None, // 0xff2 + None, // 0xff3 + }; } diff --git a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs index dbcec26dd67583..0e55c4c2d4fac3 100644 --- a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs +++ b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/Amd64InstructionTableGenerator.cs @@ -23,9 +23,8 @@ public enum EncodingFlags : int P = 0x1, // OpSize (P)refix F2 = 0x2, F3 = 0x4, - Rex = 0x8, - W = 0x10, // VEX.W / EVEX.W + W = 0x10, // REX.W / REX2.W / VEX.W / EVEX.W L = 0x20, // VEX.L (for EVEX, see LL bits below) b = 0x40, // EVEX.b (broadcast/RC/SAE Context) @@ -102,21 +101,29 @@ internal enum Map { // Map None, - Primary, - Secondary, - F38, - F3A, + Primary, // legacy map 0 + Secondary, // 0F - legacy map 1 + F38, // 0F 38 - legacy map 2 + F3A, // 0F 3A - legacy map 3 Vex1, // mmmmm = 00001 (0F) Vex2, // mmmmm = 00010 (0F 38) Vex3, // mmmmm = 00011 (0F 3A) Evex_0F, // mmm = 001 Evex_0F38, // mmm = 010 Evex_0F3A, // mmm = 011 + Evex_4, // mmm = 100 // Extended EVEX legacy promoted map 0/1 } internal sealed partial class Amd64InstructionSample { - [GeneratedRegex(@"^\s*(?
0x[a-f0-9]+)\s[^:]*:\s*(?[0-9a-f ]*)\t(?(((rex[.WRXB]*)|(rep[nez]*)|(data16)|(addr32)|(lock)|(bnd)|(\{vex\})|([cdefgs]s)) +)*)(?\S+) *(?(\S[^#]*?)?)\s*(?#.*)?$", + [GeneratedRegex( + @"^\s*" + + @"(?
0x[0-9a-fA-F]+)\s[^:]*:\s*" + + @"(?[0-9a-fA-F][0-9a-fA-F]( [0-9a-fA-F][0-9a-fA-F])*)\s*" + + @"(?(((rex[.WRXB]*)|(\{rex2 0x[0-9a-fA-F][0-9a-fA-F]?\})|(rep[nez]*)|(data16)|(addr32)|(lock)|(bnd)|(\{vex\})|(\{evex\})|([cdefgs]s)) +)*)" + + @"(?\S+) *" + + @"(?(\S[^#]*?)?)\s*" + + @"(?#.*)?$", RegexOptions.ExplicitCapture)] private static partial Regex EncDisassemblySplit(); @@ -139,15 +146,28 @@ internal sealed partial class Amd64InstructionSample ["WORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M2B, ["WORD PTR [rip+0x53525150]{1to16}"] = SuffixFlags.M2B, ["WORD PTR [rip+0x53525150]{1to32}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to16}"] = SuffixFlags.M2B, + ["WORD BCST [rip+0x53525150]{1to32}"] = SuffixFlags.M2B, ["DWORD PTR [rip+0x53525150]"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to2}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to4}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M4B, ["DWORD PTR [rip+0x53525150]{1to16}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to2}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to4}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M4B, + ["DWORD BCST [rip+0x53525150]{1to16}"] = SuffixFlags.M4B, ["QWORD PTR [rip+0x53525150]"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to2}"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to4}"] = SuffixFlags.M8B, ["QWORD PTR [rip+0x53525150]{1to8}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to2}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to4}"] = SuffixFlags.M8B, + ["QWORD BCST [rip+0x53525150]{1to8}"] = SuffixFlags.M8B, ["OWORD PTR [rip+0x53525150]"] = SuffixFlags.M16B, ["XMMWORD PTR [rip+0x53525150]"] = SuffixFlags.M16B, ["YMMWORD PTR [rip+0x53525150]"] = SuffixFlags.M32B, @@ -181,7 +201,8 @@ public int opCodeExt { get { - const byte BytePP = 0x3; + const byte VEX_pp_mask = 0x3; + const byte EVEX_pp_mask = 0x3; byte opcode = encoding[opIndex]; byte pp = 0; @@ -203,14 +224,15 @@ public int opCodeExt case Map.Vex2: case Map.Vex3: // `pp` is the low 2 bits of the last byte of the VEX prefix (either 3-byte or 2-byte form). - pp = (byte)(encoding[opIndex - 1] & BytePP); + pp = (byte)(encoding[opIndex - 1] & VEX_pp_mask); break; case Map.Evex_0F: case Map.Evex_0F38: case Map.Evex_0F3A: + case Map.Evex_4: { var evex_p1 = encoding[opIndex - 2]; - pp = (byte)(evex_p1 & BytePP); + pp = (byte)(evex_p1 & EVEX_pp_mask); break; } default: @@ -307,6 +329,7 @@ internal enum Prefixes : byte AddSize = 0x67, Vex = 0xc4, VexShort = 0xc5, + Rex2 = 0xD5, Lock = 0xf0, Rep = 0xf2, Repne = 0xf3 @@ -321,6 +344,8 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) const byte RexMask = 0xf0; const byte RexW = 0x8; + const byte Rex2W = 0x8; + const byte Rex2_M0 = 0x80; const byte Vex_ByteW = 0x80; const byte Vex_ByteL = 0x04; @@ -365,7 +390,6 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) { byte rex = encoding[operandIndex++]; - flags |= EncodingFlags.Rex; if (Debug.debug) Console.WriteLine($" P:REX"); if ((rex & RexW) != 0) @@ -477,6 +501,11 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) map = Map.Evex_0F3A; if (Debug.debug) Console.WriteLine($" map: Evex_0F3A"); break; + case 0x4: + map = Map.Evex_4; + // Extended EVEX legacy promoted map 0/1. + if (Debug.debug) Console.WriteLine($" map: Evex_4"); + break; default: throw new Exception($"Unexpected EVEX map {encoding}"); } @@ -487,23 +516,55 @@ private static (Map, byte, EncodingFlags) parsePrefix(List encoding) if (Debug.debug) Console.WriteLine($" EVEX.W"); } - byte evex_LprimeL = (byte)((evex_p2 & Evex_ByteLprimeLmask) >> Evex_ByteLprimeLshift); - flags |= Util.ConvertEvexLLToEncodingFlags(evex_LprimeL); - if (Debug.debug) + if (evex_mmm != 4) // EVEX.L'L is not used in map 4 { - Console.WriteLine($" EVEX.L'L={evex_LprimeL:x1}"); + byte evex_LprimeL = (byte)((evex_p2 & Evex_ByteLprimeLmask) >> Evex_ByteLprimeLshift); + flags |= Util.ConvertEvexLLToEncodingFlags(evex_LprimeL); + if (Debug.debug) + { + Console.WriteLine($" EVEX.L'L={evex_LprimeL:x1}"); + } } - var evex_b = evex_p2 & 0x10; - if (evex_b != 0) + if (evex_mmm != 4) // EVEX.b is not used in map 4 { - flags |= EncodingFlags.b; - if (Debug.debug) Console.WriteLine($" EVEX.b"); + var evex_b = evex_p2 & 0x10; + if (evex_b != 0) + { + flags |= EncodingFlags.b; + if (Debug.debug) Console.WriteLine($" EVEX.b"); + } } operandIndex += 4; break; } + case Prefixes.Rex2: + { + if (Debug.debug) Console.WriteLine($" P:REX2"); + var byte1 = encoding[operandIndex + 1]; + + var rex2_m0 = byte1 & Rex2_M0; + if (rex2_m0 == 0) + { + map = Map.Primary; + if (Debug.debug) Console.WriteLine($" map: Primary"); + } + else + { + map = Map.Secondary; + if (Debug.debug) Console.WriteLine($" map: Secondary"); + } + + if ((byte1 & Rex2W) != 0) + { + flags |= EncodingFlags.W; + if (Debug.debug) Console.WriteLine($" P:REX2.W"); + } + + operandIndex += 2; + break; + } default: map = Map.Primary; if (Debug.debug) Console.WriteLine($" map: primary"); @@ -599,11 +660,19 @@ internal sealed partial class Amd64InstructionTableGenerator { private List samples = new List(); - private const string assemblyPrefix = " 0x000000000"; - private const string preTerminator = "58\t"; - private const string groupTerminator = "59\tpop"; + [GeneratedRegex(@"^\s+0x00000000")] + private static partial Regex AssemblyPrefix(); + + // The '0x' prefix is not included in the regex match. + [GeneratedRegex(@"^\s*0x(?
[0-9a-fA-F]+)", RegexOptions.ExplicitCapture)] + private static partial Regex AssemblyAddress(); - [GeneratedRegex(@"((\{vex\})|(\{bad\})|(\(bad\))|(\srex(\.[WRXB]*)?\s*(#.*)?$))")] + // NOTE: APX instructions push2/push2p/pop2/pop2p are not causing gdb to report an illegal instruction, + // which is causing problems. So manually disallow them. + // NOTE: we don't disqualify disassembly with `{evex}` in the text: there are some cases where an instruction + // can be encoded with either an EVEX or VEX encoding, and the disassembler will annotate the instruction with + // `{evex}` to indicate it is not the canonical encoding. + [GeneratedRegex(@"((push2)|(pop2)|(\{vex\})|(\{bad\})|(\(bad\))|(\srex(\.[WRXB]*)?\s*(#.*)?$))")] private static partial Regex BadDisassembly(); private List<(Map, int)> regExpandOpcodes; @@ -648,6 +717,7 @@ private Amd64InstructionTableGenerator() { Map.Evex_0F, new Dictionary() }, { Map.Evex_0F38, new Dictionary() }, { Map.Evex_0F3A, new Dictionary() }, + { Map.Evex_4, new Dictionary() }, }; ParseSamples(); @@ -658,32 +728,34 @@ private void ParseSamples() { string line; string sample = null; - bool saw58 = false; + int sampleAddress = 0; + + // Each sample is written out as 16 bytes of disassembly. If we hit bad disassembly, we need to skip to the next sample + // based on the disassembly address. + while ((line = Console.In.ReadLine()) != null) { //if (Debug.debug) Console.WriteLine($"line: {line}"); - if (sample == null) + var match = AssemblyAddress().Match(line); + if (!match.Success) { - // Ignore non-assembly lines - if (line.StartsWith(assemblyPrefix)) - sample = line.Trim(); continue; } + int lineAddress = int.Parse(match.Groups["address"].Value, NumberStyles.AllowHexSpecifier); - //if (Debug.debug) Console.WriteLine($"sample: {sample}"); - - // Each sample may contain multiple instructions - // We are only interested in the first of each group - // Each group is terminated by 0x58 then 0x59 which is a pop instruction - if (!saw58) + if (sample == null) { - saw58 = line.Contains(preTerminator); + sample = line.Trim(); + sampleAddress = lineAddress; + //if (Debug.debug) Console.WriteLine($"sample: ({sampleAddress:x}) {sample}"); continue; } - else if (!line.Contains(groupTerminator)) + + // Keep skipping instructions until we get to the next sample address. + if (lineAddress < sampleAddress + 15) { - saw58 = false; + //if (Debug.debug) Console.WriteLine($"Skipping {lineAddress:x}"); continue; } @@ -712,8 +784,8 @@ private void ParseSamples() } } - saw58 = false; sample = null; + sampleAddress = 0; } } @@ -947,13 +1019,22 @@ private void SummarizeSamples(bool reg) else goto default; break; + case SuffixFlags.M8B | SuffixFlags.M2B | SuffixFlags.I4B | SuffixFlags.I2B: + if (TestHypothesis((e) => Amd64W(SuffixFlags.M8B | SuffixFlags.I4B, SuffixFlags.M2B | SuffixFlags.I2B, e), sometimesSuffix, map)) + rules += "_W_M8B_I4B_or_M2B_I2B"; + else + goto default; + break; default: - if (Debug.debug) { - Console.WriteLine($"Unhandled rule...{sometimesSuffix}"); + string mnemonics_string = string.Join(",", mnemonics.OrderBy(s => s)); + if (Debug.debug) + { + Console.WriteLine($"Unhandled rule...{sometimesSuffix} : {mnemonics_string}"); + } + Console.Error.WriteLine($"Unhandled rule...{sometimesSuffix} : {mnemonics_string}"); + return; } - Console.Error.WriteLine($"Unhandled rule...{sometimesSuffix}"); - return; } rules = rules.Replace("^_", "").Replace("^", "None"); @@ -1003,12 +1084,12 @@ public static SuffixFlags TestLL(SuffixFlags LL00, SuffixFlags LL01, SuffixFlags public static SuffixFlags Amd64L(SuffixFlags t, SuffixFlags f, EncodingFlags g) => Test(EncodingFlags.L, t, f, g); public static SuffixFlags Amd64W(SuffixFlags W1, SuffixFlags W0, EncodingFlags g) => Test(EncodingFlags.W, W1, W0, g); - public static SuffixFlags Amd64P(SuffixFlags t, SuffixFlags f, EncodingFlags g) => Test(EncodingFlags.P, f, t, g); + public static SuffixFlags Amd64P(SuffixFlags P0, SuffixFlags P1, EncodingFlags g) => Test(EncodingFlags.P, P1, P0, g); // Note: P0/P1 reversed. This puts smaller (OSIZE override) second. public static SuffixFlags Amd64b(SuffixFlags b1, SuffixFlags b0, EncodingFlags g) => Test(EncodingFlags.b, b1, b0, g); // Tests for multiple flags - public static SuffixFlags Amd64WP(SuffixFlags tx, SuffixFlags ft, SuffixFlags ff, EncodingFlags g) => Amd64W(tx, Amd64P(ft, ff, g), g); + public static SuffixFlags Amd64WP(SuffixFlags W1, SuffixFlags W0P0, SuffixFlags W0P1, EncodingFlags g) => Amd64W(W1, Amd64P(W0P0, W0P1, g), g); public static SuffixFlags Amd64WLL(SuffixFlags W1LL00, SuffixFlags W1LL01, SuffixFlags W1LL10, SuffixFlags W0LL00, SuffixFlags W0LL01, SuffixFlags W0LL10, EncodingFlags g) => Amd64W(TestLL(W1LL00, W1LL01, W1LL10, g), TestLL(W0LL00, W0LL01, W0LL10, g), g); public static SuffixFlags Amd64bLL(SuffixFlags b1, SuffixFlags b0LL00, SuffixFlags b0LL01, SuffixFlags b0LL10, EncodingFlags g) => @@ -1035,7 +1116,7 @@ private void AddOpCode(Map map, int opCodeExt, bool reg, int modrmReg, string ru else { string oldstring = null; - if (Debug.debug) + if (true) // Debug.debug { if (opcodes[map].TryGetValue(opCodeExt, out oldstring)) { @@ -1046,6 +1127,9 @@ private void AddOpCode(Map map, int opCodeExt, bool reg, int modrmReg, string ru if (Debug.debug) { Console.WriteLine($"add opcodes[{map}][{opCodeExt:x3}] = {opcodes[map][opCodeExt]}"); + } + if (true) // Debug.debug + { if ((oldstring != null) && (oldstring != opcodes[map][opCodeExt])) { Console.WriteLine($"WARNING: REPLACEMENT WAS DIFFERENT"); @@ -1090,12 +1174,21 @@ private void WriteCode() Console.WriteLine(" // I4B // Instruction includes 4 bytes of immediates"); Console.WriteLine(" // I8B // Instruction includes 8 bytes of immediates"); Console.WriteLine(" // Unknown // Instruction samples did not include a modrm configured to produce RIP addressing"); - Console.WriteLine(" // L // Flags depend on L bit in encoding. L__or_"); - Console.WriteLine(" // LL // Flags depend on L'L bits in EVEX encoding. LL___"); - Console.WriteLine(" // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector"); - Console.WriteLine(" // W // Flags depend on W bit in encoding. W__or_"); - Console.WriteLine(" // P // Flags depend on OpSize prefix for encoding. P__or_"); - Console.WriteLine(" // WP // Flags depend on W bit in encoding and OpSize prefix. WP__or__or_"); + Console.WriteLine(" // L // Flags depend on L bit in encoding."); + Console.WriteLine(" // // L__or_"); + Console.WriteLine(" // // L__or_"); + Console.WriteLine(" // LL // Flags depend on L'L bits in EVEX encoding."); + Console.WriteLine(" // // LL___"); + Console.WriteLine(" // // LL00 = 128-bit vector; LL01 = 256-bit vector; LL10 = 512-bit vector"); + Console.WriteLine(" // W // Flags depend on W bit in encoding."); + Console.WriteLine(" // // W__or_"); + Console.WriteLine(" // // W__or_"); + Console.WriteLine(" // P // Flags depend on OpSize prefix for encoding."); + Console.WriteLine(" // // P__or_"); + Console.WriteLine(" // // P__or_"); + Console.WriteLine(" // WP // Flags depend on W bit in encoding and OpSize prefix."); + Console.WriteLine(" // // WP__or__or_"); + Console.WriteLine(" // // WP__or__or_"); Console.WriteLine(" // WLL // Flags depend on W and L'L bits."); Console.WriteLine(" // // WLL____or___"); Console.WriteLine(" // bLL // Flags depend on EVEX.b and L'L bits."); @@ -1114,15 +1207,15 @@ private void WriteCode() continue; Console.WriteLine($" {rule},"); } - Console.WriteLine($" Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location in encoded in lower bits"); + Console.WriteLine($" Extension = 0x80, // The instruction encoding form depends on the modrm.reg field. Extension table location is encoded in lower bits."); Console.WriteLine(" };"); Console.WriteLine(); - Console.WriteLine(" // The following instrForm maps correspond to the amd64 instr maps"); - Console.WriteLine(" // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics"); - Console.WriteLine(" // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp"); - Console.WriteLine(" // - For Vex* the pp is directly included in the encoding"); - Console.WriteLine(" // - For the Secondary, F38, and F3A pages the pp is not defined in the encoding, but affects instr form."); + Console.WriteLine(" // The following instrForm maps correspond to the amd64 instruction maps."); + Console.WriteLine(" // The comments are for debugging convenience. The comments use a packed opcode followed by a list of observed mnemonics."); + Console.WriteLine(" // The opcode is packed to be human readable. PackedOpcode = opcode << 4 + pp. For example, 0x123 is opcode 0x12, pp=0x3."); + Console.WriteLine(" // - For Vex* and EVEX the pp is directly included in the encoding"); + Console.WriteLine(" // - For the Secondary (0F), 0F 38, and 0F 3A pages the pp is not defined in the encoding, but affects instruction form."); Console.WriteLine(" // - pp = 0 implies no prefix."); Console.WriteLine(" // - pp = 1 implies 0x66 OpSize prefix only."); Console.WriteLine(" // - pp = 2 implies 0xF3 prefix."); @@ -1130,9 +1223,9 @@ private void WriteCode() Console.WriteLine(" // - For the primary map, pp is not used and is always 0 in the comments."); Console.WriteLine(); Console.WriteLine(); - Console.WriteLine(" // Instruction which change forms based on modrm.reg are encoded in this extension table."); - Console.WriteLine(" // Since there are 8 modrm.reg values, they occur is groups of 8."); - Console.WriteLine(" // Each group is referenced from the other tables below using Extension|(index >> 3)."); + Console.WriteLine(" // Instructions which change forms based on modrm.reg are encoded in this extension table."); + Console.WriteLine(" // Since there are 8 modrm.reg values, they occur in groups of 8."); + Console.WriteLine(" // Each group is referenced from the other tables below using (Extension|(index >> 3))."); currentExtension += 8; Console.WriteLine($" static const InstrForm instrFormExtension[{currentExtension + 1}]"); Console.WriteLine(" {"); @@ -1167,7 +1260,8 @@ private void WriteCode() ("Vex3", Map.Vex3), ("Evex_0F", Map.Evex_0F), ("Evex_0F38", Map.Evex_0F38), - ("Evex_0F3A", Map.Evex_0F3A) + ("Evex_0F3A", Map.Evex_0F3A), + ("Evex_4", Map.Evex_4) }; foreach ((string name, Map map) in mapTuples) diff --git a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp index 963401812ca819..abaaa165af2555 100644 --- a/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp +++ b/src/coreclr/debug/ee/amd64/gen_amd64InstrDecode/createOpcodes.cpp @@ -2,19 +2,89 @@ // The .NET Foundation licenses this file to you under the MIT license. #include - + #define ARRAYSIZE(a) (sizeof(a)/sizeof((a)[0])) +void generatePostamble(int bytesEmitted) +{ + // We need a postamble of single-byte instructions so the disassembler can get back on track + // after a bad instruction. We always pad up to 16 bytes total codes: the maximum x86 instruction + // size is 15, so the disassembler will find at worst a 15 byte instruction followed by a single byte + // padding instruction. The minimum byte sequence we generate below is a single opcode plus a modrm, + // so we need 14 possible postamble/padding bytes. + const char* postamble[] = { + "0x50, ", + "0x51, ", + "0x52, ", + "0x53, ", + "0x54, ", + "0x55, ", + "0x56, ", + "0x57, ", + "0x58, ", + "0x59, ", + "0x59, ", + "0x59, ", + "0x59, ", + "0x59, " + }; + + int bytesToEmit = 16 - bytesEmitted; + for (int i = 0; i < bytesToEmit; i++) + { + printf("%s", postamble[i]); + } + printf("\n"); +} + int main(int argc, char* argv[]) { printf("#include \n"); printf("#include \n"); - const char* postamble = "0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,\n"; + // The sequence of generated codes is important: the tool which reads the disassembled instructions + // processes one "opcode" at a time. When the opcode changes, it summarizes the opcode and moves on + // to process the next one. Here, "opcode" means a single instruction. In the x64 encoding, this + // can be determined by the primary opcode byte, the prefix (0x66, 0xF2, 0xF3) or equivalent "pp" field + // in the VEX/EVEX prefix, and the "reg/opcode" field of the ModRM byte, which sometimes provides additional + // "opcode" bits. + // + // When generating codes, for every primary opcode, we output the following ModRM bytes: + // 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d + // this corresponds to modrm.rm=0x5 and modrm.reg=0,1,2,3,4,5,6,7. That is, all possible modrm.reg values. + // modrm.mod=0/modrm.rm=0x5 corresponds to RIP-relative addressing. The purpose of varying modrm.reg + // is to find all cases where an instruction encoding depends on modrm.reg. + // + // Thus, the 'modrm' loop needs to be less nested than the opcode/prefix loop, since varying modrm + // can change the "instruction". + // + // Note: it might be more robust to not have this ordering restriction but that would require the + // processing tool to save all in-progress calculations, for all instructions -- perhaps using a + // lot of memory? printf("uint8_t opcodes[] = {\n"); - printf("// Primary Opcode\n"); + struct byteSequence { + const char* string; + int numBytes; + }; + + // Opcodes in legacy map 0 don't change the instruction based on the 0x66 prefix (unlike in + // other maps), so the 0x66 prefix can vary inside (in a more nested loop) the modrm loop. + const byteSequence legacyMap0PrefixStrings[] = { + { "", 0 }, + { "0x66, ", 1 }, // Operand size prefix 0x66 + { "0x40, ", 1 }, // REX + { "0x66, 0x40, ", 2 }, // Operand size prefix 0x66 + REX + { "0x4F, ", 1 }, // REX.WRXB + { "0x66, 0x4F, ", 2 }, // Operand size prefix 0x66 + REX.WRXB + { "0xD5, 0x00, ", 2 }, // REX2.M0=0.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0x66, 0xD5, 0x00, ", 3 }, // Operand size prefix 0x66 + REX2.M0=0.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0xD5, 0x7F, ", 2 }, // REX2.M0=0.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + { "0x66, 0xD5, 0x7F, ", 3 } // Operand size prefix 0x66 + REX2.M0=0.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + }; + + printf("// Primary Opcode (legacy map 0)\n"); for (int i = 0; i < 256; ++i) { switch(i) @@ -47,6 +117,7 @@ int main(int argc, char* argv[]) case 0x67: // AddrSize case 0xc4: // Vex 3 Byte case 0xc5: // Vex 2 Byte + case 0xd5: // REX2 case 0xf0: // Lock case 0xf2: // Repne case 0xf3: // Rep @@ -54,24 +125,36 @@ int main(int argc, char* argv[]) default: break; } + for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x%02x, 0x%02x, %s", i, modrm, postamble); - // REX - printf( "0x40, 0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x40, 0x%02x, 0x%02x, %s", i, modrm, postamble); - // REX.WRXB - printf( "0x4f, 0x%02x, 0x%02x, %s", i, modrm, postamble); - printf( "0x66, 0x4f, 0x%02x, 0x%02x, %s", i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap0PrefixStrings); ++prefixNum) + { + printf("%s0x%02x, 0x%02x, ", legacyMap0PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(legacyMap0PrefixStrings[prefixNum].numBytes + 2); + } } printf("\n"); } // `66 F2` is only used for `0F 38 F*` ("row F") - const char* const ppString[] = {"", "0x66, ", "0xf3, ", "0xf2, ", "0x66, 0xf2, "}; + const byteSequence ppString[] = { + { "", 0 }, + { "0x66, ", 1 }, + { "0xf3, ", 1 }, + { "0xf2, ", 1 }, + { "0x66, 0xf2, ", 2 } + }; - printf("// Secondary Opcode\n"); + const byteSequence legacyMap1PrefixStrings[] = { + { "0x0F, ", 1 }, // Escape prefix + { "0x40, 0x0F, ", 2 }, // REX + { "0x4F, 0x0F, ", 2 }, // REX.WRXB + { "0xD5, 0x80, ", 2 }, // REX2.M0=1.R4=0.X4=0.B4=0.W=0.R3=0.X3=0.B3=0 + { "0xD5, 0xFF, ", 2 } // REX2.M0=1.R4=1.X4=1.B4=1.W=1.R3=1.X3=1.B3=1 + }; + + printf("// Secondary Opcode: 0F (legacy map 1)\n"); for (int i = 0; i < 256; ++i) { if (i == 0x38) // extension: 0F 38 @@ -83,17 +166,23 @@ int main(int argc, char* argv[]) { for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "0x40, %s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "0x4f, %s0x0f, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap1PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap1PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap1PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); } - printf("// 0F 38\n"); + const byteSequence legacyMap2PrefixStrings[] = { + { "0x0F, 0x38, ", 2 }, + { "0x40, 0x0F, 0x38, ", 3 }, // REX + { "0x4F, 0x0F, 0x38, ", 3 } // REX.WRXB + }; + + printf("// 0F 38 (legacy map 2)\n"); for (int i = 0; i < 256; ++i) { for (int pp = 0; pp < 5; ++pp) @@ -104,28 +193,34 @@ int main(int argc, char* argv[]) for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "%s0x40, 0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "%s0x4f, 0x0f, 0x38, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap2PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap2PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap2PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); } - printf("// 0F 3A\n"); + const byteSequence legacyMap3PrefixStrings[] = { + { "0x0F, 0x3A, ", 2 }, + { "0x40, 0x0F, 0x3A, ", 3 }, // REX + { "0x4F, 0x0F, 0x3A, ", 3 } // REX.WRXB + }; + + printf("// 0F 3A (legacy map 3)\n"); for (int i = 0; i < 256; ++i) { for (int pp = 0; pp < 2; ++pp) // only 66 prefix is used (no F3, F2) (F2 is used in VEX 0F 3A) { for (int modrm = 0x5; modrm < 64; modrm += 8) { - printf( "%s0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX - printf( "%s0x40, 0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); - // REX.WRXB - printf( "%s0x4f, 0x0f, 0x3A, 0x%02x, 0x%02x, %s", ppString[pp], i, modrm, postamble); + for (int prefixNum = 0; prefixNum < ARRAYSIZE(legacyMap3PrefixStrings); ++prefixNum) + { + printf("%s%s0x%02x, 0x%02x, ", ppString[pp].string, legacyMap3PrefixStrings[prefixNum].string, i, modrm); + generatePostamble(ppString[pp].numBytes + legacyMap3PrefixStrings[prefixNum].numBytes + 2); + } } } printf("\n"); @@ -157,7 +252,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe1, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe1, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -173,7 +269,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe2, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe2, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -189,7 +286,8 @@ int main(int argc, char* argv[]) { for (int c = 0; c < ARRAYSIZE(VexByte2Cases); ++c) { - printf( "0xc4, 0xe3, 0x%02x, 0x%02x, 0x%02x, %s", pp + VexByte2Cases[c], i, modrm, postamble); + printf("0xc4, 0xe3, 0x%02x, 0x%02x, 0x%02x, ", pp + VexByte2Cases[c], i, modrm); + generatePostamble(5); } } } @@ -199,21 +297,30 @@ int main(int argc, char* argv[]) // Interesting cases for the EVEX prefix. Several cases are added below, in the loops, to ensure desired // ordering: // 1. cases of `mmm` (which defines the opcode decoding map) are the outer loops. - // 2. cases of `pp`, next inner loops. - // 3. cases of ModR/M byte, innermost loops. + // 2. one-byte instruction opcode, next inner loops. + // 3. cases of `pp`, next inner loops. + // 4. cases of ModR/M byte, next inner loops. + // 5. various EVEX cases, innermost loops. + // NOTE: 4 & 5 can probably (and possibly should, for consistency with above loops) be swapped. // // In all cases, we have: // P0: // P[3] = P0[3] = 0 // required by specification + // -- For APX, mmm=0b100, P[3] = B4, 0 is ok // EVEX.R'=1 (inverted) + // -- For APX, mmm=0b100, EVEX.R' = EVEX.R4 (inverted) = P[4]. 1 (inverted value) is ok // EVEX.RXB=111 (inverted) + // -- For APX, mmm=0b100, EVEX.RXB (inverted) = EVEX.R3.X3.B3 (inverted), so 111 is ok. // P1: // P[10] = P1[2] = 1 // required by specification + // -- For APX, mmm=0b100, EVEX.X4/1 (inverted) so 1 is ok // P2: - // EVEX.aaa = 0 // opmask register k0 (no masking) - // EVEX.V'=1 (inverted) - // EVEX.b=0 // no broadcast (REVIEW: need to handle broadcast as it changes the size of the memory operand) - // EVEX.z=0 // always merge + // P[18:16] = P2[2:0] = EVEX.aaa = 0 // opmask register k0 (no masking) + // -- For APX, mmm=0b100, P2[0] = P2[1] = 0, P2[2] = NF = 0 (same as non-APX) + // P[19] = P2[3] = EVEX.V'=1 (inverted) + // -- For APX, mmm=0b100, EVEX.V' = EVEX.V4 (inverted), so 1 is ok. + // P[23] = P2[7] = EVEX.z=0 // always merge + // -- For APX, mmm=0b100, P[23] = 0. // // Note that we don't need to consider disp8*N compressed displacement support since that is not used for // RIP-relative addressing, which is all we care about. @@ -222,6 +329,10 @@ int main(int argc, char* argv[]) const int evex_p1_base = 0x04; const int evex_p2_base = 0x08; + const int evex_4_p0_base = 0xf0; + const int evex_4_p1_base = 0x7c; + const int evex_4_p2_base = 0x08; + const int evex_w_cases[] = // EVEX.W in P1 { 0, @@ -229,6 +340,8 @@ int main(int argc, char* argv[]) }; const size_t evex_w_cases_size = ARRAYSIZE(evex_w_cases); + // For APX, mmm=0b100, EVEX.vvvv is used to store NDD register if EVEX.ND=1. We never set EVEX.ND=1 + // since it doesn't affect instruction size or RIP-relative memory information. const int evex_vvvv_cases[] = // EVEX.vvvv in P1 { 0, // 0000b (xmm15) @@ -236,6 +349,7 @@ int main(int argc, char* argv[]) }; const size_t evex_vvvv_cases_size = ARRAYSIZE(evex_vvvv_cases); + // For APX, mmm=0b100, P[22:21] = P2[6:5] = EVEX.L'L and must be zero. const int evex_LprimeL_cases[] = // EVEX.L'L in P2 { 0, // 00b = 128-bit vectors @@ -244,6 +358,7 @@ int main(int argc, char* argv[]) }; const size_t evex_LprimeL_cases_size = ARRAYSIZE(evex_LprimeL_cases); + // -- For APX, mmm=0b100, P[20] = P2[4] = EVEX.b = EVEX.ND, so we keep it zero const int evex_b_cases[] = // EVEX.b in P2 { 0, // 0b = no broadcast @@ -251,14 +366,17 @@ int main(int argc, char* argv[]) }; const size_t evex_b_cases_size = ARRAYSIZE(evex_b_cases); - const size_t total_evex_cases = evex_w_cases_size * evex_vvvv_cases_size * evex_LprimeL_cases_size * evex_b_cases_size; + const size_t total_evex_cases = evex_w_cases_size * evex_vvvv_cases_size * evex_LprimeL_cases_size * evex_b_cases_size; + const size_t total_evex_4_cases = evex_w_cases_size; struct EvexBytes { int p0, p1, p2; - } - EvexCases[total_evex_cases]; - + }; + + EvexBytes EvexCases[total_evex_cases]; // cases for mmm=0b001, 0b010, 0b011 + EvexBytes Evex4Cases[total_evex_4_cases]; // cases for mmm=0b100 + size_t evex_case = 0; for (size_t i = 0; i < evex_w_cases_size; i++) { @@ -277,6 +395,15 @@ int main(int argc, char* argv[]) } } + evex_case = 0; + for (size_t i = 0; i < evex_w_cases_size; i++) + { + Evex4Cases[evex_case].p0 = evex_4_p0_base; + Evex4Cases[evex_case].p1 = evex_4_p1_base | evex_w_cases[i]; + Evex4Cases[evex_case].p2 = evex_4_p2_base; + ++evex_case; + } + printf("// EVEX: mmm=001 (0F)\n"); for (int i = 0; i < 256; ++i) { @@ -289,7 +416,8 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x1; // mmm=001 (0F) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } @@ -308,7 +436,8 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x2; // mmm=010 (0F 38) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } @@ -327,7 +456,28 @@ int main(int argc, char* argv[]) int evex_p0 = EvexCases[c].p0 | 0x3; // mmm=011 (0F 3A) int evex_p1 = EvexCases[c].p1 | pp; int evex_p2 = EvexCases[c].p2; - printf( "0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, %s", evex_p0, evex_p1, evex_p2, i, modrm, postamble); + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); + } + } + } + printf("\n"); + } + + printf("// EVEX: mmm=100 (extended EVEX; APX promoted legacy map 0 instructions)\n"); + for (int i = 0; i < 256; ++i) + { + for (int pp = 0; pp < 4; ++pp) + { + for (int modrm = 0x5; modrm < 64; modrm += 8) + { + for (int c = 0; c < ARRAYSIZE(Evex4Cases); ++c) + { + int evex_p0 = Evex4Cases[c].p0 | 0x4; // mmm=100 + int evex_p1 = Evex4Cases[c].p1 | pp; + int evex_p2 = Evex4Cases[c].p2; + printf("0x62, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, ", evex_p0, evex_p1, evex_p2, i, modrm); + generatePostamble(6); } } } diff --git a/src/coreclr/debug/ee/amd64/walker.cpp b/src/coreclr/debug/ee/amd64/walker.cpp index 4eef90d526a2b1..5ed3b9c8e319be 100644 --- a/src/coreclr/debug/ee/amd64/walker.cpp +++ b/src/coreclr/debug/ee/amd64/walker.cpp @@ -23,6 +23,8 @@ // void NativeWalker::Decode() { + LOG((LF_CORDB, LL_INFO100000, "NW:Decode: m_ip 0x%p\n", m_ip)); + const BYTE *ip = m_ip; m_type = WALK_UNKNOWN; @@ -30,13 +32,13 @@ void NativeWalker::Decode() m_nextIP = NULL; BYTE rex = 0; - - LOG((LF_CORDB, LL_INFO100000, "NW:Decode: m_ip 0x%p\n", m_ip)); + BYTE rex2_payload = 0; + bool has_rex2 = false; BYTE prefix = *ip; if (prefix == 0xcc) { - prefix = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); + prefix = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); // REVIEW: change `m_ip` to `ip`? LOG((LF_CORDB, LL_INFO100000, "NW:Decode 1st byte was patched, might have been prefix\n")); } @@ -65,8 +67,13 @@ void NativeWalker::Decode() // String REP prefixes case 0xf2: // REPNE/REPNZ case 0xf3: - LOG((LF_CORDB, LL_INFO10000, "NW:Decode: prefix:%0.2x ", prefix)); + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: prefix:%02x ", prefix)); ip++; + // REVIEW: it looks like a bug that we don't loop here looking for additional + // prefixes (the 'continue' branches to the 'while (0)' which exits the loop). + // Thus, we will only process a single prefix. For example, we won't process + // "66 40", which is an operand size prefix followed by a REX prefix, and is legal. + // REX and REX2 need to be the final prefixes, but even then, looping would be safe. continue; // REX register extension prefixes @@ -86,13 +93,27 @@ void NativeWalker::Decode() case 0x4d: case 0x4e: case 0x4f: - LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX prefix:%0.2x ", prefix)); + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX prefix:%02x ", prefix)); // make sure to set rex to prefix, not *ip because *ip still represents the // codestream which has a 0xcc in it. rex = prefix; ip++; continue; + // REX2 register extension prefix + case 0xd5: + LOG((LF_CORDB, LL_INFO10000, "NW:Decode: REX2 prefix:%02x ", prefix)); + has_rex2 = true; + ip++; + rex2_payload = *ip; // Get the REX2 payload byte + if (rex2_payload == 0xcc) + { + rex2_payload = (BYTE)DebuggerController::GetPatchedOpcode(ip); + LOG((LF_CORDB, LL_INFO100000, "NW:Decode 2nd byte was patched, REX2 prefix payload byte\n")); + } + ip++; + continue; + default: break; } @@ -101,18 +122,18 @@ void NativeWalker::Decode() // Read the opcode m_opcode = *ip++; - LOG((LF_CORDB, LL_INFO100000, "NW:Decode: ip 0x%p, m_opcode:%0.2x\n", ip, m_opcode)); + LOG((LF_CORDB, LL_INFO100000, "NW:Decode: ip 0x%p, m_opcode:%02x\n", ip, m_opcode)); // Don't remove this, when we did the check above for the prefix we didn't modify the codestream // and since m_opcode was just taken directly from the code stream it will be patched if we // didn't have a prefix if (m_opcode == 0xcc) { - m_opcode = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); - LOG((LF_CORDB, LL_INFO100000, "NW:Decode after patch look up: m_opcode:%0.2x\n", m_opcode)); + m_opcode = (BYTE)DebuggerController::GetPatchedOpcode(m_ip); // REVIEW: it looks like a bug that we use 'm_ip' instead of 'ip' here. + LOG((LF_CORDB, LL_INFO100000, "NW:Decode after patch look up: m_opcode:%02x\n", m_opcode)); } - // Setup rex bits if needed + // Setup REX bits if needed BYTE rex_b = 0; BYTE rex_x = 0; BYTE rex_r = 0; @@ -124,29 +145,52 @@ void NativeWalker::Decode() rex_r = (rex & 0x4) >> 2; // high bit to modrm reg field } + // Setup REX2 bits if needed + BYTE rex2_b3 = 0; + BYTE rex2_b4 = 0; + BYTE rex2_x3 = 0; + BYTE rex2_x4 = 0; + BYTE rex2_r3 = 0; + BYTE rex2_r4 = 0; + + // We could have a REX2 prefix with a zero payload byte, but that would leave these fields all zero, which is correct. + if (rex2_payload != 0) + { + rex2_b3 = rex2_payload & 0x1; + rex2_x3 = (rex2_payload >> 1) & 0x1; + rex2_r3 = (rex2_payload >> 2) & 0x1; + rex2_b4 = (rex2_payload >> 4) & 0x1; + rex2_x4 = (rex2_payload >> 5) & 0x1; + rex2_r4 = (rex2_payload >> 6) & 0x1; + } + // Analyze what we can of the opcode switch (m_opcode) { + // Look for CALL, JMP with opcode 0xFF, modrm.reg=2,3,4,5 case 0xff: { BYTE modrm = *ip++; - // Ignore "inc dword ptr [reg]" instructions - if (modrm == 0) - break; - BYTE mod = (modrm & 0xC0) >> 6; BYTE reg = (modrm & 0x38) >> 3; BYTE rm = (modrm & 0x07); - reg |= (rex_r << 3); - rm |= (rex_b << 3); - - if ((reg < 2) || (reg > 5 && reg < 8) || (reg > 15)) { - // not a valid register for a CALL or BRANCH + if ((reg < 2) || (reg > 5)) { + // Not a CALL/JMP instruction (modrm.reg field is an opcode extension for opcode FF) return; } + BYTE rm_reg = rm; + if (rex != 0) + { + rm_reg |= (rex_b << 3); + } + else if (rex2_payload != 0) + { + rm_reg |= (rex2_b3 << 3) | (rex2_b4 << 4); + } + BYTE *result; WORD displace; @@ -158,12 +202,12 @@ void NativeWalker::Decode() case 0: case 1: case 2: - if ((rm & 0x07) == 4) // we have an SIB byte following + if (rm == 4) // we have an SIB byte following { // // Get values from the SIB byte // - BYTE sib = *ip; + BYTE sib = *ip; _ASSERT(sib != 0); @@ -171,21 +215,31 @@ void NativeWalker::Decode() BYTE index = (sib & 0x38) >> 3; BYTE base = (sib & 0x07); - index |= (rex_x << 3); - base |= (rex_b << 3); + BYTE index_reg = index; + BYTE base_reg = base; + if (rex != 0) + { + index_reg |= (rex_x << 3); + base_reg |= (rex_b << 3); + } + else if (rex2_payload != 0) + { + index_reg |= (rex2_x3 << 3) | (rex2_x4 << 4); + base_reg |= (rex2_b3 << 3) | (rex2_b4 << 4); + } ip++; // // Get starting value // - if ((mod == 0) && ((base & 0x07) == 5)) + if ((mod == 0) && (base == 5)) { result = 0; } else { - result = (BYTE *)(size_t)GetRegisterValue(base); + result = (BYTE *)(size_t)GetRegisterValue(base_reg); } // @@ -193,7 +247,7 @@ void NativeWalker::Decode() // if (index != 0x4) { - result = result + (GetRegisterValue(index) << ss); + result = result + (GetRegisterValue(index_reg) << ss); } // @@ -201,7 +255,7 @@ void NativeWalker::Decode() // if (mod == 0) { - if ((base & 0x07) == 5) + if (base == 5) { result = result + *((INT32*)ip); displace = 7; @@ -221,7 +275,6 @@ void NativeWalker::Decode() result = result + *((INT32*)ip); displace = 7; } - } else { @@ -230,28 +283,32 @@ void NativeWalker::Decode() // // Check for RIP-relative addressing mode. - if ((mod == 0) && ((rm & 0x07) == 5)) + if ((mod == 0) && (rm == 5)) { + // [RIP + disp32] displace = 6; // 1 byte opcode + 1 byte modrm + 4 byte displacement (signed) result = const_cast(m_ip) + displace + *(reinterpret_cast(ip)); } else { - result = (BYTE *)GetRegisterValue(rm); + result = (BYTE *)GetRegisterValue(rm_reg); if (mod == 0) { - displace = 2; + // [modrm.rm] + displace = 2; // 1 byte opcode + 1 byte modrm } else if (mod == 1) { + // [modrm.rm + disp8] result = result + *((INT8*)ip); - displace = 3; + displace = 3; // 1 byte opcode + 1 byte modrm + 1 byte displacement } else // mod == 2 { + // [modrm.rm + disp32] result = result + *((INT32*)ip); - displace = 6; + displace = 6; // 1 byte opcode + 1 byte modrm + 4 byte displacement (signed) } } } @@ -266,9 +323,9 @@ void NativeWalker::Decode() case 3: default: // The operand is stored in a register. - result = (BYTE *)GetRegisterValue(rm); - displace = 2; - + // [modrm.rm] + result = (BYTE *)GetRegisterValue(rm_reg); + displace = 2; // 1 byte opcode + 1 byte modrm break; } @@ -280,6 +337,11 @@ void NativeWalker::Decode() displace++; } + if (has_rex2) // Can't just check `rex2_payload` since that payload byte might be zero. + { + displace += 2; // adjust for the size of the REX2 prefix + } + // because we already checked register validity for CALL/BRANCH // instructions above we can assume that there is no other option if ((reg == 4) || (reg == 5)) @@ -344,52 +406,71 @@ UINT64 NativeWalker::GetRegisterValue(int registerNumber) { case 0: return m_registers->pCurrentContext->Rax; - break; case 1: return m_registers->pCurrentContext->Rcx; - break; case 2: return m_registers->pCurrentContext->Rdx; - break; case 3: return m_registers->pCurrentContext->Rbx; - break; case 4: return m_registers->pCurrentContext->Rsp; - break; case 5: return m_registers->pCurrentContext->Rbp; - break; case 6: return m_registers->pCurrentContext->Rsi; - break; case 7: return m_registers->pCurrentContext->Rdi; - break; case 8: return m_registers->pCurrentContext->R8; - break; case 9: return m_registers->pCurrentContext->R9; - break; case 10: return m_registers->pCurrentContext->R10; - break; case 11: return m_registers->pCurrentContext->R11; - break; case 12: return m_registers->pCurrentContext->R12; - break; case 13: return m_registers->pCurrentContext->R13; - break; case 14: return m_registers->pCurrentContext->R14; - break; case 15: return m_registers->pCurrentContext->R15; - break; +#if 0 + // TODO-XArch-APX: The Windows SDK doesn't define the APX eGPR registers yet. + case 16: + return m_registers->pCurrentContext->R16; + case 17: + return m_registers->pCurrentContext->R17; + case 18: + return m_registers->pCurrentContext->R18; + case 19: + return m_registers->pCurrentContext->R19; + case 20: + return m_registers->pCurrentContext->R21; + case 21: + return m_registers->pCurrentContext->R21; + case 22: + return m_registers->pCurrentContext->R22; + case 23: + return m_registers->pCurrentContext->R23; + case 24: + return m_registers->pCurrentContext->R24; + case 25: + return m_registers->pCurrentContext->R25; + case 26: + return m_registers->pCurrentContext->R26; + case 27: + return m_registers->pCurrentContext->R27; + case 28: + return m_registers->pCurrentContext->R28; + case 29: + return m_registers->pCurrentContext->R29; + case 30: + return m_registers->pCurrentContext->R30; + case 31: + return m_registers->pCurrentContext->R31; +#endif default: _ASSERTE(!"Invalid register number!"); } @@ -431,8 +512,6 @@ static bool InstructionHasModRMByte(Amd64InstrDecode::InstrForm form, bool W) modrm = false; break; default: - if (form & Amd64InstrDecode::InstrForm::Extension) - modrm = true; break; } return modrm; @@ -446,15 +525,15 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) // M1st cases (memory operand comes first) case Amd64InstrDecode::InstrForm::M1st_I1B_L_M16B_or_M8B: case Amd64InstrDecode::InstrForm::M1st_I1B_LL_M8B_M16B_M32B: + case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::M1st_I1B_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_L_M32B_or_M16B: case Amd64InstrDecode::InstrForm::M1st_LL_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::M1st_LL_M2B_M4B_M8B: case Amd64InstrDecode::InstrForm::M1st_LL_M4B_M8B_M16B: case Amd64InstrDecode::InstrForm::M1st_LL_M8B_M16B_M32B: - case Amd64InstrDecode::InstrForm::M1st_bLL_M4B_M16B_M32B_M64B: - case Amd64InstrDecode::InstrForm::M1st_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::M1st_M16B: case Amd64InstrDecode::InstrForm::M1st_M16B_I1B: case Amd64InstrDecode::InstrForm::M1st_M1B: @@ -469,6 +548,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) case Amd64InstrDecode::InstrForm::M1st_W_M4B_or_M1B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::M1st_WP_M8B_or_M4B_or_M2B: @@ -482,6 +562,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) case Amd64InstrDecode::InstrForm::MOnly_P_M6B_or_M4B: case Amd64InstrDecode::InstrForm::MOnly_W_M16B_or_M8B: case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M8B_or_M2B: isWrite = true; @@ -495,7 +576,7 @@ static bool InstructionIsWrite(Amd64InstrDecode::InstrForm form) static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, bool W, bool L, bool evex_b, int LL, bool fPrefix66) { uint8_t opSize = 0; - bool P = !((pp == 1) || fPrefix66); + const bool P = ((pp == 1) || fPrefix66); switch (form) { // M32B @@ -545,6 +626,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // W_M8B_or_M4B case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M4B: case Amd64InstrDecode::InstrForm::MOp_I1B_W_M8B_or_M4B: @@ -553,7 +635,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // WP_M8B_or_M8B_or_M2B case Amd64InstrDecode::InstrForm::MOnly_WP_M8B_or_M8B_or_M2B: - opSize = W ? 8 : P ? 8 : 2; + opSize = W ? 8 : P ? 2 : 8; break; // WP_M8B_or_M4B_or_M2B case Amd64InstrDecode::InstrForm::M1st_I1B_WP_M8B_or_M4B_or_M2B: @@ -563,11 +645,14 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, case Amd64InstrDecode::InstrForm::MOp_I1B_WP_M8B_or_M4B_or_M2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_or_M4B_or_M2B: - opSize = W ? 8 : P ? 4 : 2; + opSize = W ? 8 : P ? 2 : 4; break; // W_M8B_or_M2B + case Amd64InstrDecode::InstrForm::M1st_I1B_W_M8B_or_M2B: case Amd64InstrDecode::InstrForm::M1st_W_M8B_or_M2B: + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_W_M8B_or_M2B: + case Amd64InstrDecode::InstrForm::MOnly_W_M8B_or_M2B: opSize = W ? 8 : 2; break; // M8B @@ -581,7 +666,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // P_M6B_or_M4B case Amd64InstrDecode::InstrForm::MOnly_P_M6B_or_M4B: - opSize = P ? 6 : 4; + opSize = P ? 4 : 6; break; // M4B case Amd64InstrDecode::InstrForm::M1st_M4B: @@ -660,7 +745,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // bLL_M4B_M16B_M32B_M64B - case Amd64InstrDecode::InstrForm::M1st_bLL_M4B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_I1B_bLL_M4B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_bLL_M4B_M16B_M32B_M64B: if (evex_b) @@ -674,7 +758,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, break; // bLL_M8B_M16B_M32B_M64B - case Amd64InstrDecode::InstrForm::M1st_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_I1B_bLL_M8B_M16B_M32B_M64B: case Amd64InstrDecode::InstrForm::MOp_bLL_M8B_M16B_M32B_M64B: if (evex_b) @@ -779,7 +862,6 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, } break; - // MUnknown case Amd64InstrDecode::InstrForm::M1st_MUnknown: case Amd64InstrDecode::InstrForm::MOnly_MUnknown: @@ -796,7 +878,7 @@ static uint8_t InstructionOperandSize(Amd64InstrDecode::InstrForm form, int pp, static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, bool fPrefix66) { int immSize = 0; - bool P = !((pp == 1) || fPrefix66); + const bool P = ((pp == 1) || fPrefix66); switch (form) { case Amd64InstrDecode::InstrForm::I1B: @@ -833,6 +915,7 @@ static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, immSize = 3; break; case Amd64InstrDecode::InstrForm::I4B: + case Amd64InstrDecode::InstrForm::M1st_I4B_W_M8B_or_M4B: immSize = 4; break; case Amd64InstrDecode::InstrForm::I8B: @@ -841,10 +924,13 @@ static int InstructionImmSize(Amd64InstrDecode::InstrForm form, int pp, bool W, case Amd64InstrDecode::InstrForm::M1st_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::MOp_WP_M8B_I4B_or_M4B_I4B_or_M2B_I2B: case Amd64InstrDecode::InstrForm::WP_I4B_or_I4B_or_I2B: - immSize = W ? 4 : P ? 4 : 2; + immSize = W ? 4 : P ? 2 : 4; break; case Amd64InstrDecode::InstrForm::WP_I8B_or_I4B_or_I2B: - immSize = W ? 8 : P ? 4 : 2; + immSize = W ? 8 : P ? 2 : 4; + break; + case Amd64InstrDecode::InstrForm::M1st_W_M8B_I4B_or_M2B_I2B: + immSize = W ? 4 : 2; break; default: @@ -966,12 +1052,14 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio VexMapC40F3A = 0xc403, EvexMap0F = 0x6201, EvexMap0F38 = 0x6202, - EvexMap0F3A = 0x6203 + EvexMap0F3A = 0x6203, + EvexMap4 = 0x6204 } opCodeMap; switch (*address) { case 0xf: + { switch (address[1]) { case 0x38: @@ -999,6 +1087,7 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio else if (fPrefixF3) pp = 0x2; break; + } case 0xc4: // Vex 3-byte { @@ -1052,7 +1141,10 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio case 0x62: // Evex { - BYTE evex_mmm = address[1] & 0x7; + BYTE evex_p0 = address[1]; + BYTE evex_p1 = address[2]; + BYTE evex_p2 = address[3]; + BYTE evex_mmm = evex_p0 & 0x7; switch (evex_mmm) { case 0x1: @@ -1067,29 +1159,64 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio LOG((LF_CORDB, LL_INFO10000, "map:Evex0F3A ")); opCodeMap = EvexMap0F3A; break; + case 0x4: + LOG((LF_CORDB, LL_INFO10000, "map:Evex4 ")); + opCodeMap = EvexMap4; + break; default: _ASSERT(!"Unknown Evex 'mmm' bytes"); return; } - BYTE evex_w = address[2] & 0x80; + BYTE evex_w = evex_p1 & 0x80; if (evex_w != 0) { W = true; } - if ((address[2] & 0x10) != 0) + if (evex_mmm != 4) { - evex_b = true; - } + if ((evex_p2 & 0x10) != 0) + { + evex_b = true; + } - evex_LL = (address[2] >> 5) & 0x3; + evex_LL = (evex_p2 >> 5) & 0x3; + } - pp = address[1] & 0x3; + pp = evex_p1 & 0x3; address += 4; break; } + case 0xD5: // REX2 + { + BYTE rex2_byte1 = address[1]; + address += 2; + + BYTE rex2_w = rex2_byte1 & 0x08; + if (rex2_w != 0) + { + W = true; + } + + if (fPrefix66) + { + pp = 0x1; + } + + BYTE rex2_m0 = rex2_byte1 & 0x80; + if (rex2_m0 == 0) + { + opCodeMap = Primary; + } + else + { + opCodeMap = Secondary; + } + break; + } + default: opCodeMap = Primary; break; @@ -1105,7 +1232,7 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio switch (opCodeMap) { case Primary: - form = Amd64InstrDecode::instrFormPrimary[opCode]; + form = Amd64InstrDecode::instrFormPrimary[opCode]; // NOTE: instrFormPrimary is the only map that uses 'opCode', not 'opCodeExt'. break; case Secondary: form = Amd64InstrDecode::instrFormSecondary[opCodeExt]; @@ -1134,6 +1261,9 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio case EvexMap0F3A: form = Amd64InstrDecode::instrFormEvex_0F3A[opCodeExt]; break; + case EvexMap4: + form = Amd64InstrDecode::instrFormEvex_4[opCodeExt]; + break; default: _ASSERTE(false); } @@ -1227,4 +1357,3 @@ void NativeWalker::DecodeInstructionForPatchSkip(const BYTE *address, Instructio } #endif // TARGET_AMD64 -