Skip to content

Commit 0b9e4dc

Browse files
Merge branch 'master' into sp/image-wrap-ptr
2 parents 0782f14 + f84d525 commit 0b9e4dc

File tree

11 files changed

+579
-394
lines changed

11 files changed

+579
-394
lines changed

src/ImageSharp/ColorSpaces/Companding/SRgbCompanding.cs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) Six Labors.
1+
// Copyright (c) Six Labors.
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
@@ -25,12 +25,14 @@ public static class SRgbCompanding
2525
[MethodImpl(InliningOptions.ShortMethod)]
2626
public static void Expand(Span<Vector4> vectors)
2727
{
28-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
28+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
29+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
2930

30-
for (int i = 0; i < vectors.Length; i++)
31+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
3132
{
32-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
33-
Expand(ref v);
33+
Expand(ref vectorsStart);
34+
35+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
3436
}
3537
}
3638

@@ -41,12 +43,14 @@ public static void Expand(Span<Vector4> vectors)
4143
[MethodImpl(InliningOptions.ShortMethod)]
4244
public static void Compress(Span<Vector4> vectors)
4345
{
44-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
46+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
47+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
4548

46-
for (int i = 0; i < vectors.Length; i++)
49+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
4750
{
48-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
49-
Compress(ref v);
51+
Compress(ref vectorsStart);
52+
53+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
5054
}
5155
}
5256

@@ -90,4 +94,4 @@ public static void Compress(ref Vector4 vector)
9094
[MethodImpl(InliningOptions.ShortMethod)]
9195
public static float Compress(float channel) => channel <= 0.0031308F ? 12.92F * channel : (1.055F * MathF.Pow(channel, 0.416666666666667F)) - 0.055F;
9296
}
93-
}
97+
}

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 113 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,11 @@ public static int GreatestCommonDivisor(int a, int b)
4141

4242
/// <summary>
4343
/// Determine the Least Common Multiple (LCM) of two numbers.
44+
/// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
4445
/// </summary>
4546
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4647
public static int LeastCommonMultiple(int a, int b)
47-
{
48-
// https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor
49-
return (a / GreatestCommonDivisor(a, b)) * b;
50-
}
48+
=> a / GreatestCommonDivisor(a, b) * b;
5149

5250
/// <summary>
5351
/// Calculates <paramref name="x"/> % 2
@@ -290,10 +288,14 @@ public static void Clamp(Span<byte> span, byte min, byte max)
290288

291289
if (remainder.Length > 0)
292290
{
293-
for (int i = 0; i < remainder.Length; i++)
291+
ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
292+
ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
293+
294+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
294295
{
295-
ref byte v = ref remainder[i];
296-
v = Clamp(v, min, max);
296+
remainderStart = Clamp(remainderStart, min, max);
297+
298+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
297299
}
298300
}
299301
}
@@ -311,10 +313,14 @@ public static void Clamp(Span<uint> span, uint min, uint max)
311313

312314
if (remainder.Length > 0)
313315
{
314-
for (int i = 0; i < remainder.Length; i++)
316+
ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
317+
ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
318+
319+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
315320
{
316-
ref uint v = ref remainder[i];
317-
v = Clamp(v, min, max);
321+
remainderStart = Clamp(remainderStart, min, max);
322+
323+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
318324
}
319325
}
320326
}
@@ -332,10 +338,14 @@ public static void Clamp(Span<int> span, int min, int max)
332338

333339
if (remainder.Length > 0)
334340
{
335-
for (int i = 0; i < remainder.Length; i++)
341+
ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
342+
ref int remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
343+
344+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
336345
{
337-
ref int v = ref remainder[i];
338-
v = Clamp(v, min, max);
346+
remainderStart = Clamp(remainderStart, min, max);
347+
348+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
339349
}
340350
}
341351
}
@@ -353,10 +363,14 @@ public static void Clamp(Span<float> span, float min, float max)
353363

354364
if (remainder.Length > 0)
355365
{
356-
for (int i = 0; i < remainder.Length; i++)
366+
ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
367+
ref float remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
368+
369+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
357370
{
358-
ref float v = ref remainder[i];
359-
v = Clamp(v, min, max);
371+
remainderStart = Clamp(remainderStart, min, max);
372+
373+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
360374
}
361375
}
362376
}
@@ -374,10 +388,14 @@ public static void Clamp(Span<double> span, double min, double max)
374388

375389
if (remainder.Length > 0)
376390
{
377-
for (int i = 0; i < remainder.Length; i++)
391+
ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
392+
ref double remainderEnd = ref Unsafe.Add(ref remainderStart, remainder.Length);
393+
394+
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
378395
{
379-
ref double v = ref remainder[i];
380-
v = Clamp(v, min, max);
396+
remainderStart = Clamp(remainderStart, min, max);
397+
398+
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
381399
}
382400
}
383401
}
@@ -407,33 +425,42 @@ private static void ClampImpl<T>(Span<T> span, T min, T max)
407425
where T : unmanaged
408426
{
409427
ref T sRef = ref MemoryMarshal.GetReference(span);
410-
ref Vector<T> vsBase = ref Unsafe.As<T, Vector<T>>(ref MemoryMarshal.GetReference(span));
411428
var vmin = new Vector<T>(min);
412429
var vmax = new Vector<T>(max);
413430

414431
int n = span.Length / Vector<T>.Count;
415432
int m = Modulo4(n);
416433
int u = n - m;
417434

418-
for (int i = 0; i < u; i += 4)
419-
{
420-
ref Vector<T> vs0 = ref Unsafe.Add(ref vsBase, i);
421-
ref Vector<T> vs1 = ref Unsafe.Add(ref vs0, 1);
422-
ref Vector<T> vs2 = ref Unsafe.Add(ref vs0, 2);
423-
ref Vector<T> vs3 = ref Unsafe.Add(ref vs0, 3);
435+
ref Vector<T> vs0 = ref Unsafe.As<T, Vector<T>>(ref MemoryMarshal.GetReference(span));
436+
ref Vector<T> vs1 = ref Unsafe.Add(ref vs0, 1);
437+
ref Vector<T> vs2 = ref Unsafe.Add(ref vs0, 2);
438+
ref Vector<T> vs3 = ref Unsafe.Add(ref vs0, 3);
439+
ref Vector<T> vsEnd = ref Unsafe.Add(ref vs0, u);
424440

441+
while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
442+
{
425443
vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
426444
vs1 = Vector.Min(Vector.Max(vmin, vs1), vmax);
427445
vs2 = Vector.Min(Vector.Max(vmin, vs2), vmax);
428446
vs3 = Vector.Min(Vector.Max(vmin, vs3), vmax);
447+
448+
vs0 = ref Unsafe.Add(ref vs0, 4);
449+
vs1 = ref Unsafe.Add(ref vs1, 4);
450+
vs2 = ref Unsafe.Add(ref vs2, 4);
451+
vs3 = ref Unsafe.Add(ref vs3, 4);
429452
}
430453

431454
if (m > 0)
432455
{
433-
for (int i = u; i < n; i++)
456+
vs0 = ref vsEnd;
457+
vsEnd = ref Unsafe.Add(ref vsEnd, m);
458+
459+
while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
434460
{
435-
ref Vector<T> vs0 = ref Unsafe.Add(ref vsBase, i);
436461
vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
462+
463+
vs0 = ref Unsafe.Add(ref vs0, 1);
437464
}
438465
}
439466
}
@@ -472,10 +499,8 @@ public static void Premultiply(Span<Vector4> vectors)
472499
#if SUPPORTS_RUNTIME_INTRINSICS
473500
if (Avx2.IsSupported && vectors.Length >= 2)
474501
{
475-
ref Vector256<float> vectorsBase =
476-
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
477-
478502
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
503+
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
479504
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
480505

481506
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@@ -495,12 +520,14 @@ public static void Premultiply(Span<Vector4> vectors)
495520
else
496521
#endif
497522
{
498-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
523+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
524+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
499525

500-
for (int i = 0; i < vectors.Length; i++)
526+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
501527
{
502-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
503-
Premultiply(ref v);
528+
Premultiply(ref vectorsStart);
529+
530+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
504531
}
505532
}
506533
}
@@ -515,10 +542,8 @@ public static void UnPremultiply(Span<Vector4> vectors)
515542
#if SUPPORTS_RUNTIME_INTRINSICS
516543
if (Avx2.IsSupported && vectors.Length >= 2)
517544
{
518-
ref Vector256<float> vectorsBase =
519-
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
520-
521545
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
546+
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
522547
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
523548

524549
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
@@ -538,12 +563,14 @@ public static void UnPremultiply(Span<Vector4> vectors)
538563
else
539564
#endif
540565
{
541-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
566+
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
567+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, vectors.Length);
542568

543-
for (int i = 0; i < vectors.Length; i++)
569+
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
544570
{
545-
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
546-
UnPremultiply(ref v);
571+
UnPremultiply(ref vectorsStart);
572+
573+
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
547574
}
548575
}
549576
}
@@ -633,53 +660,54 @@ public static unsafe void CubeRootOnXYZ(Span<Vector4> vectors)
633660
vectors128Ref = y4;
634661
vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
635662
}
636-
637-
return;
638663
}
664+
else
639665
#endif
640-
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
641-
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
642-
643-
// Fallback with scalar preprocessing and vectorized approximation steps
644-
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
645666
{
646-
Vector4 v = vectorsRef;
667+
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
668+
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, vectors.Length);
647669

648-
double
649-
x64 = v.X,
650-
y64 = v.Y,
651-
z64 = v.Z;
652-
float a = v.W;
653-
654-
ulong
655-
xl = *(ulong*)&x64,
656-
yl = *(ulong*)&y64,
657-
zl = *(ulong*)&z64;
658-
659-
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
660-
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
661-
// this means what we actually want is to find the cube root of our clamped values.
662-
// For more info on the constant below, see:
663-
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
664-
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
665-
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
666-
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
667-
xl = 0x2a9f8a7be393b600 + (xl / 3);
668-
yl = 0x2a9f8a7be393b600 + (yl / 3);
669-
zl = 0x2a9f8a7be393b600 + (zl / 3);
670-
671-
Vector4 y4;
672-
y4.X = (float)*(double*)&xl;
673-
y4.Y = (float)*(double*)&yl;
674-
y4.Z = (float)*(double*)&zl;
675-
y4.W = 0;
676-
677-
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
678-
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
679-
y4.W = a;
680-
681-
vectorsRef = y4;
682-
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
670+
// Fallback with scalar preprocessing and vectorized approximation steps
671+
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
672+
{
673+
Vector4 v = vectorsRef;
674+
675+
double
676+
x64 = v.X,
677+
y64 = v.Y,
678+
z64 = v.Z;
679+
float a = v.W;
680+
681+
ulong
682+
xl = *(ulong*)&x64,
683+
yl = *(ulong*)&y64,
684+
zl = *(ulong*)&z64;
685+
686+
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
687+
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
688+
// this means what we actually want is to find the cube root of our clamped values.
689+
// For more info on the constant below, see:
690+
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
691+
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
692+
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
693+
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
694+
xl = 0x2a9f8a7be393b600 + (xl / 3);
695+
yl = 0x2a9f8a7be393b600 + (yl / 3);
696+
zl = 0x2a9f8a7be393b600 + (zl / 3);
697+
698+
Vector4 y4;
699+
y4.X = (float)*(double*)&xl;
700+
y4.Y = (float)*(double*)&yl;
701+
y4.Z = (float)*(double*)&zl;
702+
y4.W = 0;
703+
704+
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
705+
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
706+
y4.W = a;
707+
708+
vectorsRef = y4;
709+
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
710+
}
683711
}
684712
}
685713
}

0 commit comments

Comments
 (0)