<PackageReference Include="BouncyCastle.Cryptography" Version="2.4.0" />

Nat512

static class Nat512
using Org.BouncyCastle.Runtime.Intrinsics; using Org.BouncyCastle.Runtime.Intrinsics.X86; using System; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace Org.BouncyCastle.Math.Raw { internal static class Nat512 { public static void Mul(uint[] x, uint[] y, uint[] zz) { Nat256.Mul(x, y, zz); Nat256.Mul(x, 8, y, 8, zz, 16); uint num = Nat256.AddToEachOther(zz, 8, zz, 16); uint cIn = num + Nat256.AddTo(zz, 0, zz, 8, 0); num += Nat256.AddTo(zz, 24, zz, 16, cIn); uint[] array = Nat256.Create(); uint[] array2 = Nat256.Create(); bool flag = Nat256.Diff(x, 8, x, 0, array, 0) != Nat256.Diff(y, 8, y, 0, array2, 0); uint[] array3 = Nat256.CreateExt(); Nat256.Mul(array, array2, array3); num = (uint)((int)num + (flag ? ((int)Nat.AddTo(16, array3, 0, zz, 8)) : Nat.SubFrom(16, array3, 0, zz, 8))); Nat.AddWordAt(32, num, zz, 24); } public static void Square(uint[] x, uint[] zz) { Nat256.Square(x, zz); Nat256.Square(x, 8, zz, 16); uint num = Nat256.AddToEachOther(zz, 8, zz, 16); uint cIn = num + Nat256.AddTo(zz, 0, zz, 8, 0); num += Nat256.AddTo(zz, 24, zz, 16, cIn); uint[] array = Nat256.Create(); Nat256.Diff(x, 8, x, 0, array, 0); uint[] array2 = Nat256.CreateExt(); Nat256.Square(array, array2); num = (uint)((int)num + Nat.SubFrom(16, array2, 0, zz, 8)); Nat.AddWordAt(32, num, zz, 24); } public static void Xor(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff) { Xor(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff)); } public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan = MemoryMarshal.AsBytes(x.Slice(0, 16)); ReadOnlySpan<byte> readOnlySpan2 = MemoryMarshal.AsBytes(y.Slice(0, 16)); Span<byte> span = MemoryMarshal.AsBytes(z.Slice(0, 16)); Vector256<byte> left = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(0, 32)); Vector256<byte> left2 = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(32, 32)); Vector256<byte> right = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan2.Slice(0, 32)); Vector256<byte> right2 = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan2.Slice(32, 32)); Vector256<byte> value = System.Runtime.Intrinsics.X86.Avx2.Xor(left, right); Vector256<byte> value2 = System.Runtime.Intrinsics.X86.Avx2.Xor(left2, right2); MemoryMarshal.Write(span.Slice(0, 32), ref value); MemoryMarshal.Write(span.Slice(32, 32), ref value2); } else if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan3 = MemoryMarshal.AsBytes(x.Slice(0, 16)); ReadOnlySpan<byte> readOnlySpan4 = MemoryMarshal.AsBytes(y.Slice(0, 16)); Span<byte> span2 = MemoryMarshal.AsBytes(z.Slice(0, 16)); Vector128<byte> left3 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(0, 16)); Vector128<byte> left4 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(16, 16)); Vector128<byte> left5 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(32, 16)); Vector128<byte> left6 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(48, 16)); Vector128<byte> right3 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(0, 16)); Vector128<byte> right4 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(16, 16)); Vector128<byte> right5 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(32, 16)); Vector128<byte> right6 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(48, 16)); Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, right3); Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, right4); Vector128<byte> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(left5, right5); Vector128<byte> value6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, right6); MemoryMarshal.Write(span2.Slice(0, 16), ref value3); MemoryMarshal.Write(span2.Slice(16, 16), ref value4); MemoryMarshal.Write(span2.Slice(32, 16), ref value5); MemoryMarshal.Write(span2.Slice(48, 16), ref value6); } else { for (int i = 0; i < 16; i += 4) { z[i] = (x[i] ^ y[i]); z[i + 1] = (x[i + 1] ^ y[i + 1]); z[i + 2] = (x[i + 2] ^ y[i + 2]); z[i + 3] = (x[i + 3] ^ y[i + 3]); } } } public static void XorTo(uint[] x, int xOff, uint[] z, int zOff) { XorTo(x.AsSpan(xOff), z.AsSpan(zOff)); } public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z) { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan = MemoryMarshal.AsBytes(x.Slice(0, 16)); Span<byte> span = MemoryMarshal.AsBytes(z.Slice(0, 16)); Vector256<byte> left = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(0, 32)); Vector256<byte> left2 = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(32, 32)); Vector256<byte> right = MemoryMarshal.Read<Vector256<byte>>(span.Slice(0, 32)); Vector256<byte> right2 = MemoryMarshal.Read<Vector256<byte>>(span.Slice(32, 32)); Vector256<byte> value = System.Runtime.Intrinsics.X86.Avx2.Xor(left, right); Vector256<byte> value2 = System.Runtime.Intrinsics.X86.Avx2.Xor(left2, right2); MemoryMarshal.Write(span.Slice(0, 32), ref value); MemoryMarshal.Write(span.Slice(32, 32), ref value2); } else if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan2 = MemoryMarshal.AsBytes(x.Slice(0, 16)); Span<byte> span2 = MemoryMarshal.AsBytes(z.Slice(0, 16)); Vector128<byte> left3 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(0, 16)); Vector128<byte> left4 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(16, 16)); Vector128<byte> left5 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(32, 16)); Vector128<byte> left6 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(48, 16)); Vector128<byte> right3 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(0, 16)); Vector128<byte> right4 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(16, 16)); Vector128<byte> right5 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(32, 16)); Vector128<byte> right6 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(48, 16)); Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, right3); Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, right4); Vector128<byte> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(left5, right5); Vector128<byte> value6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, right6); MemoryMarshal.Write(span2.Slice(0, 16), ref value3); MemoryMarshal.Write(span2.Slice(16, 16), ref value4); MemoryMarshal.Write(span2.Slice(32, 16), ref value5); MemoryMarshal.Write(span2.Slice(48, 16), ref value6); } else { for (int i = 0; i < 16; i += 4) { z[i] ^= x[i]; z[i + 1] ^= x[i + 1]; z[i + 2] ^= x[i + 2]; z[i + 3] ^= x[i + 3]; } } } public static void Xor64(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) { Xor64(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff)); } public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z) { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan = MemoryMarshal.AsBytes(x.Slice(0, 8)); ReadOnlySpan<byte> readOnlySpan2 = MemoryMarshal.AsBytes(y.Slice(0, 8)); Span<byte> span = MemoryMarshal.AsBytes(z.Slice(0, 8)); Vector256<byte> left = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(0, 32)); Vector256<byte> left2 = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(32, 32)); Vector256<byte> right = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan2.Slice(0, 32)); Vector256<byte> right2 = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan2.Slice(32, 32)); Vector256<byte> value = System.Runtime.Intrinsics.X86.Avx2.Xor(left, right); Vector256<byte> value2 = System.Runtime.Intrinsics.X86.Avx2.Xor(left2, right2); MemoryMarshal.Write(span.Slice(0, 32), ref value); MemoryMarshal.Write(span.Slice(32, 32), ref value2); } else if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan3 = MemoryMarshal.AsBytes(x.Slice(0, 8)); ReadOnlySpan<byte> readOnlySpan4 = MemoryMarshal.AsBytes(y.Slice(0, 8)); Span<byte> span2 = MemoryMarshal.AsBytes(z.Slice(0, 8)); Vector128<byte> left3 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(0, 16)); Vector128<byte> left4 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(16, 16)); Vector128<byte> left5 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(32, 16)); Vector128<byte> left6 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan3.Slice(48, 16)); Vector128<byte> right3 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(0, 16)); Vector128<byte> right4 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(16, 16)); Vector128<byte> right5 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(32, 16)); Vector128<byte> right6 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan4.Slice(48, 16)); Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, right3); Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, right4); Vector128<byte> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(left5, right5); Vector128<byte> value6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, right6); MemoryMarshal.Write(span2.Slice(0, 16), ref value3); MemoryMarshal.Write(span2.Slice(16, 16), ref value4); MemoryMarshal.Write(span2.Slice(32, 16), ref value5); MemoryMarshal.Write(span2.Slice(48, 16), ref value6); } else { for (int i = 0; i < 8; i += 4) { z[i] = (x[i] ^ y[i]); z[i + 1] = (x[i + 1] ^ y[i + 1]); z[i + 2] = (x[i + 2] ^ y[i + 2]); z[i + 3] = (x[i + 3] ^ y[i + 3]); } } } public static void XorTo64(ulong[] x, int xOff, ulong[] z, int zOff) { XorTo64(x.AsSpan(xOff), z.AsSpan(zOff)); } public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z) { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan = MemoryMarshal.AsBytes(x.Slice(0, 8)); Span<byte> span = MemoryMarshal.AsBytes(z.Slice(0, 8)); Vector256<byte> left = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(0, 32)); Vector256<byte> left2 = MemoryMarshal.Read<Vector256<byte>>(readOnlySpan.Slice(32, 32)); Vector256<byte> right = MemoryMarshal.Read<Vector256<byte>>(span.Slice(0, 32)); Vector256<byte> right2 = MemoryMarshal.Read<Vector256<byte>>(span.Slice(32, 32)); Vector256<byte> value = System.Runtime.Intrinsics.X86.Avx2.Xor(left, right); Vector256<byte> value2 = System.Runtime.Intrinsics.X86.Avx2.Xor(left2, right2); MemoryMarshal.Write(span.Slice(0, 32), ref value); MemoryMarshal.Write(span.Slice(32, 32), ref value2); } else if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { ReadOnlySpan<byte> readOnlySpan2 = MemoryMarshal.AsBytes(x.Slice(0, 8)); Span<byte> span2 = MemoryMarshal.AsBytes(z.Slice(0, 8)); Vector128<byte> left3 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(0, 16)); Vector128<byte> left4 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(16, 16)); Vector128<byte> left5 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(32, 16)); Vector128<byte> left6 = MemoryMarshal.Read<Vector128<byte>>(readOnlySpan2.Slice(48, 16)); Vector128<byte> right3 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(0, 16)); Vector128<byte> right4 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(16, 16)); Vector128<byte> right5 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(32, 16)); Vector128<byte> right6 = MemoryMarshal.Read<Vector128<byte>>(span2.Slice(48, 16)); Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, right3); Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, right4); Vector128<byte> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(left5, right5); Vector128<byte> value6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, right6); MemoryMarshal.Write(span2.Slice(0, 16), ref value3); MemoryMarshal.Write(span2.Slice(16, 16), ref value4); MemoryMarshal.Write(span2.Slice(32, 16), ref value5); MemoryMarshal.Write(span2.Slice(48, 16), ref value6); } else { for (int i = 0; i < 8; i += 4) { z[i] ^= x[i]; z[i + 1] ^= x[i + 1]; z[i + 2] ^= x[i + 2]; z[i + 3] ^= x[i + 3]; } } } } }