Skip to content

Commit 5e4ef66

Browse files
committed
Implemented scalar FP16C instructions
1 parent fe230d7 commit 5e4ef66

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

Windows/whisper/whisper.vcxproj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
<ClCompile>
7575
<WarningLevel>Level3</WarningLevel>
7676
<SDLCheck>true</SDLCheck>
77-
<PreprocessorDefinitions>WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
77+
<PreprocessorDefinitions>__F16C__;WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
7878
<ConformanceMode>true</ConformanceMode>
7979
<LanguageStandard>stdcpp20</LanguageStandard>
8080
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
@@ -91,7 +91,7 @@
9191
<FunctionLevelLinking>true</FunctionLevelLinking>
9292
<IntrinsicFunctions>true</IntrinsicFunctions>
9393
<SDLCheck>true</SDLCheck>
94-
<PreprocessorDefinitions>WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
94+
<PreprocessorDefinitions>__F16C__;WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
9595
<ConformanceMode>true</ConformanceMode>
9696
<LanguageStandard>stdcpp20</LanguageStandard>
9797
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
@@ -108,7 +108,7 @@
108108
<ClCompile>
109109
<WarningLevel>Level3</WarningLevel>
110110
<SDLCheck>true</SDLCheck>
111-
<PreprocessorDefinitions>WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;_DEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
111+
<PreprocessorDefinitions>__F16C__;WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;_DEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
112112
<ConformanceMode>true</ConformanceMode>
113113
<LanguageStandard>stdcpp20</LanguageStandard>
114114
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
@@ -125,7 +125,7 @@
125125
<FunctionLevelLinking>true</FunctionLevelLinking>
126126
<IntrinsicFunctions>true</IntrinsicFunctions>
127127
<SDLCheck>true</SDLCheck>
128-
<PreprocessorDefinitions>WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;NDEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
128+
<PreprocessorDefinitions>__F16C__;WHISPER_SHARED;WHISPER_BUILD;_CRT_SECURE_NO_WARNINGS;NDEBUG;WHISPER_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
129129
<ConformanceMode>true</ConformanceMode>
130130
<LanguageStandard>stdcpp20</LanguageStandard>
131131
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>

ggml.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,28 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
139139
// ref: https://github.com/Maratyszcza/FP16
140140

141141
#ifdef __F16C__
142-
float ggml_fp16_to_fp32(ggml_fp16_t h) {
143-
return _cvtsh_ss(h);
142+
143+
float ggml_fp16_to_fp32( ggml_fp16_t h )
144+
{
145+
static_assert( sizeof( ggml_fp16_t ) == 2, "sizeof" );
146+
147+
__m128i iv = _mm_cvtsi32_si128( h );
148+
__m128 fv = _mm_cvtph_ps( iv );
149+
return _mm_cvtss_f32( fv );
144150
}
145-
ggml_fp16_t ggml_fp32_to_fp16(float f) {
146-
return _cvtss_sh(f, 0);
151+
152+
ggml_fp16_t ggml_fp32_to_fp16( float f )
153+
{
154+
static_assert( sizeof( ggml_fp16_t ) == 2, "sizeof" );
155+
156+
__m128 fv = _mm_set_ss( f );
157+
__m128i iv = _mm_cvtps_ph( fv, 0 );
158+
uint32_t i32 = (uint32_t)_mm_cvtsi128_si32( iv );
159+
return (ggml_fp16_t)(uint16_t)i32;
147160
}
148161

149-
#define GGML_FP16_TO_FP32(x) _cvtsh_ss(x)
150-
#define GGML_FP32_TO_FP16(x) _cvtss_sh(x, 0)
162+
#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
163+
#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x)
151164

152165
#else
153166

0 commit comments

Comments
 (0)