Skip to content

Commit 5c7f4a9

Browse files
committed
Added more specialized Png filter code
Modified tests accordingly
1 parent 9d04ec8 commit 5c7f4a9

File tree

6 files changed

+145
-17
lines changed

6 files changed

+145
-17
lines changed

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,7 @@ public static Vector256<float> Lerp(
749749
public static float Lerp(float value1, float value2, float amount)
750750
=> ((value2 - value1) * amount) + value1;
751751

752+
#if SUPPORTS_RUNTIME_INTRINSICS
752753
[MethodImpl(MethodImplOptions.AggressiveInlining)]
753754
public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
754755
{
@@ -762,5 +763,50 @@ public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
762763
accumulator += intLow;
763764
accumulator += intHigh;
764765
}
766+
767+
/// <summary>
768+
/// Reduces elements of the vector into one sum.
769+
/// </summary>
770+
/// <param name="accumulator">The accumulator to reduce.</param>
771+
/// <returns>The sum of all elements.</returns>
772+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
773+
public static int ReduceSum(Vector128<int> accumulator)
774+
{
775+
if (Ssse3.IsSupported)
776+
{
777+
Vector128<int> hadd = Ssse3.HorizontalAdd(accumulator, accumulator);
778+
Vector128<int> swapped = Sse2.Shuffle(hadd, 0x1);
779+
Vector128<int> tmp = Sse2.Add(hadd, swapped);
780+
781+
// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
782+
return Sse2.ConvertToInt32(tmp);
783+
}
784+
else
785+
{
786+
int sum = 0;
787+
for (int i = 0; i < Vector128<int>.Count; i++)
788+
{
789+
sum += accumulator.GetElement(i);
790+
}
791+
792+
return sum;
793+
}
794+
}
795+
796+
/// <summary>
797+
/// Reduces even elements of the vector into one sum.
798+
/// </summary>
799+
/// <param name="accumulator">The accumulator to reduce.</param>
800+
/// <returns>The sum of even elements.</returns>
801+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
802+
public static int EvenReduceSum(Vector256<int> accumulator)
803+
{
804+
Vector128<int> vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); // add upper lane to lower lane
805+
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); // add high to low
806+
807+
// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
808+
return Sse2.ConvertToInt32(vsum);
809+
}
810+
#endif
765811
}
766812
}

src/ImageSharp/Formats/Png/Filters/AverageFilter.cs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,7 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
106106
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
107107
}
108108

109-
for (int i = 0; i < Vector256<int>.Count; i++)
110-
{
111-
sum += sumAccumulator.GetElement(i);
112-
}
109+
sum += Numerics.EvenReduceSum(sumAccumulator);
113110
}
114111
else if (Sse2.IsSupported)
115112
{
@@ -156,10 +153,7 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
156153
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
157154
}
158155

159-
for (int i = 0; i < Vector128<int>.Count; i++)
160-
{
161-
sum += sumAccumulator.GetElement(i);
162-
}
156+
sum += Numerics.ReduceSum(sumAccumulator);
163157
}
164158
#endif
165159

src/ImageSharp/Formats/Png/Filters/PaethFilter.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,7 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
108108
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
109109
}
110110

111-
for (int i = 0; i < Vector256<int>.Count; i++)
112-
{
113-
sum += sumAccumulator.GetElement(i);
114-
}
111+
sum += Numerics.EvenReduceSum(sumAccumulator);
115112
}
116113
else if (Vector.IsHardwareAccelerated)
117114
{

src/ImageSharp/Formats/Png/Filters/SubFilter.cs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
88

9+
#if SUPPORTS_RUNTIME_INTRINSICS
10+
using System.Runtime.Intrinsics;
11+
using System.Runtime.Intrinsics.X86;
12+
#endif
13+
914
namespace SixLabors.ImageSharp.Formats.Png.Filters
1015
{
1116
/// <summary>
@@ -66,7 +71,26 @@ public static void Encode(Span<byte> scanline, Span<byte> result, int bytesPerPi
6671
}
6772

6873
#if SUPPORTS_RUNTIME_INTRINSICS
69-
if (Vector.IsHardwareAccelerated)
74+
if (Avx2.IsSupported)
75+
{
76+
Vector256<byte> zero = Vector256<byte>.Zero;
77+
Vector256<int> sumAccumulator = Vector256<int>.Zero;
78+
79+
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
80+
{
81+
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
82+
Vector256<byte> prev = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
83+
84+
Vector256<byte> res = Avx2.Subtract(scan, prev);
85+
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
86+
x += Vector256<byte>.Count;
87+
88+
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
89+
}
90+
91+
sum += Numerics.EvenReduceSum(sumAccumulator);
92+
}
93+
else if (Vector.IsHardwareAccelerated)
7094
{
7195
Vector<uint> sumAccumulator = Vector<uint>.Zero;
7296

src/ImageSharp/Formats/Png/Filters/UpFilter.cs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
88

9+
#if SUPPORTS_RUNTIME_INTRINSICS
10+
using System.Runtime.Intrinsics;
11+
using System.Runtime.Intrinsics.X86;
12+
#endif
13+
914
namespace SixLabors.ImageSharp.Formats.Png.Filters
1015
{
1116
/// <summary>
@@ -61,7 +66,26 @@ public static void Encode(Span<byte> scanline, Span<byte> previousScanline, Span
6166
int x = 0;
6267

6368
#if SUPPORTS_RUNTIME_INTRINSICS
64-
if (Vector.IsHardwareAccelerated)
69+
if (Avx2.IsSupported)
70+
{
71+
Vector256<byte> zero = Vector256<byte>.Zero;
72+
Vector256<int> sumAccumulator = Vector256<int>.Zero;
73+
74+
for (; x + Vector256<byte>.Count <= scanline.Length;)
75+
{
76+
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
77+
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
78+
79+
Vector256<byte> res = Avx2.Subtract(scan, above);
80+
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
81+
x += Vector256<byte>.Count;
82+
83+
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
84+
}
85+
86+
sum += Numerics.EvenReduceSum(sumAccumulator);
87+
}
88+
else if (Vector.IsHardwareAccelerated)
6589
{
6690
Vector<uint> sumAccumulator = Vector<uint>.Zero;
6791

tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static void RunTest()
101101
}
102102

103103
[Fact]
104-
public void PaethSimd()
104+
public void PaethAvx2()
105105
{
106106
static void RunTest()
107107
{
@@ -114,6 +114,20 @@ static void RunTest()
114114
HwIntrinsics.AllowAll);
115115
}
116116

117+
[Fact]
118+
public void PaethVector()
119+
{
120+
static void RunTest()
121+
{
122+
var data = new TestData(PngFilterMethod.Paeth, Size);
123+
data.TestFilter();
124+
}
125+
126+
FeatureTestRunner.RunWithHwIntrinsicsFeature(
127+
RunTest,
128+
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
129+
}
130+
117131
[Fact]
118132
public void Up()
119133
{
@@ -128,8 +142,9 @@ static void RunTest()
128142
HwIntrinsics.DisableSIMD);
129143
}
130144

145+
131146
[Fact]
132-
public void UpSimd()
147+
public void UpAvx2()
133148
{
134149
static void RunTest()
135150
{
@@ -142,6 +157,20 @@ static void RunTest()
142157
HwIntrinsics.AllowAll);
143158
}
144159

160+
[Fact]
161+
public void UpVector()
162+
{
163+
static void RunTest()
164+
{
165+
var data = new TestData(PngFilterMethod.Up, Size);
166+
data.TestFilter();
167+
}
168+
169+
FeatureTestRunner.RunWithHwIntrinsicsFeature(
170+
RunTest,
171+
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
172+
}
173+
145174
[Fact]
146175
public void Sub()
147176
{
@@ -157,7 +186,7 @@ static void RunTest()
157186
}
158187

159188
[Fact]
160-
public void SubSimd()
189+
public void SubAvx2()
161190
{
162191
static void RunTest()
163192
{
@@ -170,6 +199,20 @@ static void RunTest()
170199
HwIntrinsics.AllowAll);
171200
}
172201

202+
[Fact]
203+
public void SubVector()
204+
{
205+
static void RunTest()
206+
{
207+
var data = new TestData(PngFilterMethod.Sub, Size);
208+
data.TestFilter();
209+
}
210+
211+
FeatureTestRunner.RunWithHwIntrinsicsFeature(
212+
RunTest,
213+
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
214+
}
215+
173216
public class TestData
174217
{
175218
private readonly PngFilterMethod filter;

0 commit comments

Comments
 (0)