SecT283Field
using Org.BouncyCastle.Math.Raw;
using Org.BouncyCastle.Runtime.Intrinsics;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using System;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Math.EC.Custom.Sec
{
internal static class SecT283Field
{
private const ulong M27 = 134217727;
private const ulong M57 = 144115188075855871;
private static readonly ulong[] ROOT_Z = new ulong[5] {
878416384462358536,
3513665537849438403,
9369774767598502668,
585610922974906400,
34087042
};
public static void Add(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
z[0] = (x[0] ^ y[0]);
z[1] = (x[1] ^ y[1]);
z[2] = (x[2] ^ y[2]);
z[3] = (x[3] ^ y[3]);
z[4] = (x[4] ^ y[4]);
}
public static void AddBothTo(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
z[0] ^= (x[0] ^ y[0]);
z[1] ^= (x[1] ^ y[1]);
z[2] ^= (x[2] ^ y[2]);
z[3] ^= (x[3] ^ y[3]);
z[4] ^= (x[4] ^ y[4]);
}
public static void AddExt(ReadOnlySpan<ulong> xx, ReadOnlySpan<ulong> yy, Span<ulong> zz)
{
zz[0] = (xx[0] ^ yy[0]);
zz[1] = (xx[1] ^ yy[1]);
zz[2] = (xx[2] ^ yy[2]);
zz[3] = (xx[3] ^ yy[3]);
zz[4] = (xx[4] ^ yy[4]);
zz[5] = (xx[5] ^ yy[5]);
zz[6] = (xx[6] ^ yy[6]);
zz[7] = (xx[7] ^ yy[7]);
zz[8] = (xx[8] ^ yy[8]);
}
public static void AddOne(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] = (x[0] ^ 1);
z[1] = x[1];
z[2] = x[2];
z[3] = x[3];
z[4] = x[4];
}
public static void AddTo(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] ^= x[0];
z[1] ^= x[1];
z[2] ^= x[2];
z[3] ^= x[3];
z[4] ^= x[4];
}
public static ulong[] FromBigInteger(BigInteger x)
{
return Nat.FromBigInteger64(283, x);
}
public unsafe static void HalfTrace(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[72], 9);
Nat320.Copy64(x, z);
for (int i = 1; i < 283; i += 2) {
ImplSquare(z, span);
Reduce(span, z);
ImplSquare(z, span);
Reduce(span, z);
AddTo(x, z);
}
}
public unsafe static void Invert(ReadOnlySpan<ulong> x, Span<ulong> z)
{
if (Nat320.IsZero64(x))
throw new InvalidOperationException();
Span<ulong> span = new Span<ulong>(stackalloc byte[40], 5);
Span<ulong> span2 = new Span<ulong>(stackalloc byte[40], 5);
Square(x, span);
Multiply(span, x, span);
SquareN(span, 2, span2);
Multiply(span2, span, span2);
SquareN(span2, 4, span);
Multiply(span, span2, span);
SquareN(span, 8, span2);
Multiply(span2, span, span2);
Square(span2, span2);
Multiply(span2, x, span2);
SquareN(span2, 17, span);
Multiply(span, span2, span);
Square(span, span);
Multiply(span, x, span);
SquareN(span, 35, span2);
Multiply(span2, span, span2);
SquareN(span2, 70, span);
Multiply(span, span2, span);
Square(span, span);
Multiply(span, x, span);
SquareN(span, 141, span2);
Multiply(span2, span, span2);
Square(span2, z);
}
public unsafe static void Multiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[80], 10);
ImplMultiply(x, y, span);
Reduce(span, z);
}
public unsafe static void MultiplyAddToExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[80], 10);
ImplMultiply(x, y, span);
AddExt(zz, span, zz);
}
public static void MultiplyExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
zz.Slice(0, 10).Fill(0);
ImplMultiply(x, y, zz);
}
public static void Reduce(ReadOnlySpan<ulong> xx, Span<ulong> z)
{
ulong num = xx[0];
ulong num2 = xx[1];
ulong num3 = xx[2];
ulong num4 = xx[3];
ulong num5 = xx[4];
ulong num6 = xx[5];
ulong num7 = xx[6];
ulong num8 = xx[7];
ulong num9 = xx[8];
num4 ^= ((num9 << 37) ^ (num9 << 42) ^ (num9 << 44) ^ (num9 << 49));
num5 ^= ((num9 >> 27) ^ (num9 >> 22) ^ (num9 >> 20) ^ (num9 >> 15));
num3 ^= ((num8 << 37) ^ (num8 << 42) ^ (num8 << 44) ^ (num8 << 49));
num4 ^= ((num8 >> 27) ^ (num8 >> 22) ^ (num8 >> 20) ^ (num8 >> 15));
num2 ^= ((num7 << 37) ^ (num7 << 42) ^ (num7 << 44) ^ (num7 << 49));
num3 ^= ((num7 >> 27) ^ (num7 >> 22) ^ (num7 >> 20) ^ (num7 >> 15));
num ^= ((num6 << 37) ^ (num6 << 42) ^ (num6 << 44) ^ (num6 << 49));
num2 ^= ((num6 >> 27) ^ (num6 >> 22) ^ (num6 >> 20) ^ (num6 >> 15));
ulong num10 = num5 >> 27;
z[0] = (num ^ num10 ^ (num10 << 5) ^ (num10 << 7) ^ (num10 << 12));
z[1] = num2;
z[2] = num3;
z[3] = num4;
z[4] = (num5 & 134217727);
}
public static void Reduce37(ulong[] z, int zOff)
{
ulong num = z[zOff + 4];
ulong num2 = num >> 27;
z[zOff] ^= (num2 ^ (num2 << 5) ^ (num2 << 7) ^ (num2 << 12));
z[zOff + 4] = (num & 134217727);
}
public unsafe static void Sqrt(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[40], 5);
span[0] = Interleave.Unshuffle(x[0], x[1], out ulong even);
span[1] = Interleave.Unshuffle(x[2], x[3], out ulong even2);
span[2] = Interleave.Unshuffle(x[4], out ulong even3);
Multiply(span, ROOT_Z, z);
z[0] ^= even;
z[1] ^= even2;
z[2] ^= even3;
}
public unsafe static void Square(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[72], 9);
ImplSquare(x, span);
Reduce(span, z);
}
public unsafe static void SquareAddToExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[72], 9);
ImplSquare(x, span);
AddExt(zz, span, zz);
}
public static void SquareExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
ImplSquare(x, zz);
}
public unsafe static void SquareN(ReadOnlySpan<ulong> x, int n, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[72], 9);
ImplSquare(x, span);
Reduce(span, z);
while (--n > 0) {
ImplSquare(z, span);
Reduce(span, z);
}
}
public static uint Trace(ReadOnlySpan<ulong> x)
{
return (uint)((int)(x[0] ^ (x[4] >> 15)) & 1);
}
private static void ImplCompactExt(Span<ulong> zz)
{
ulong num = zz[0];
ulong num2 = zz[1];
ulong num3 = zz[2];
ulong num4 = zz[3];
ulong num5 = zz[4];
ulong num6 = zz[5];
ulong num7 = zz[6];
ulong num8 = zz[7];
ulong num9 = zz[8];
ulong num10 = zz[9];
zz[0] = (num ^ (num2 << 57));
zz[1] = ((num2 >> 7) ^ (num3 << 50));
zz[2] = ((num3 >> 14) ^ (num4 << 43));
zz[3] = ((num4 >> 21) ^ (num5 << 36));
zz[4] = ((num5 >> 28) ^ (num6 << 29));
zz[5] = ((num6 >> 35) ^ (num7 << 22));
zz[6] = ((num7 >> 42) ^ (num8 << 15));
zz[7] = ((num8 >> 49) ^ (num9 << 8));
zz[8] = ((num9 >> 56) ^ (num10 << 1));
zz[9] = num10 >> 63;
}
private static void ImplExpand(ReadOnlySpan<ulong> x, Span<ulong> z)
{
ulong num = x[0];
ulong num2 = x[1];
ulong num3 = x[2];
ulong num4 = x[3];
ulong num5 = x[4];
z[0] = (num & 144115188075855871);
z[1] = (((num >> 57) ^ (num2 << 7)) & 144115188075855871);
z[2] = (((num2 >> 50) ^ (num3 << 14)) & 144115188075855871);
z[3] = (((num3 >> 43) ^ (num4 << 21)) & 144115188075855871);
z[4] = ((num4 >> 36) ^ (num5 << 28));
}
private unsafe static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && Vector.IsPackedLittleEndian) {
Vector128<ulong> left = Vector128.Create(x[0], x[1]);
Vector128<ulong> left2 = Vector128.Create(x[2], x[3]);
Vector128<ulong> left3 = Vector128.CreateScalar(x[4]);
Vector128<ulong> right = Vector128.Create(y[0], y[1]);
Vector128<ulong> right2 = Vector128.Create(y[2], y[3]);
Vector128<ulong> right3 = Vector128.CreateScalar(y[4]);
Vector128<ulong> value = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 0);
Vector128<ulong> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 16));
Vector128<ulong> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 0), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 17), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 0)));
Vector128<ulong> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 1), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 16), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 16))));
Vector128<ulong> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right3, 0), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 17), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 0), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 17), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left3, right, 0)))));
Vector128<ulong> value6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right3, 1), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 1), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 16), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left3, right, 16))));
Vector128<ulong> value7 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right3, 0), System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 17), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left3, right2, 0)));
Vector128<ulong> value8 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right3, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left3, right2, 16));
Vector128<ulong> value9 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left3, right3, 0);
value = System.Runtime.Intrinsics.X86.Sse2.Xor(value, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value2, 8));
value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value2, 8));
value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value4, 8));
value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(value5, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value4, 8));
value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(value5, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value6, 8));
value7 = System.Runtime.Intrinsics.X86.Sse2.Xor(value7, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value6, 8));
value7 = System.Runtime.Intrinsics.X86.Sse2.Xor(value7, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value8, 8));
value9 = System.Runtime.Intrinsics.X86.Sse2.Xor(value9, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value8, 8));
Span<byte> span = MemoryMarshal.AsBytes(zz);
MemoryMarshal.Write(span.Slice(0, 16), ref value);
MemoryMarshal.Write(span.Slice(16, 16), ref value3);
MemoryMarshal.Write(span.Slice(32, 16), ref value5);
MemoryMarshal.Write(span.Slice(48, 16), ref value7);
MemoryMarshal.Write(span.Slice(64, 16), ref value9);
} else {
ulong[] array = new ulong[5];
ulong[] array2 = new ulong[5];
ImplExpand(x, array);
ImplExpand(y, array2);
Span<ulong> u = zz;
Span<ulong> span2 = new Span<ulong>(stackalloc byte[208], 26);
ImplMulw(u, array[0], array2[0], span2.Slice(0, span2.Length));
ImplMulw(u, array[1], array2[1], span2.Slice(2, span2.Length - 2));
ImplMulw(u, array[2], array2[2], span2.Slice(4, span2.Length - 4));
ImplMulw(u, array[3], array2[3], span2.Slice(6, span2.Length - 6));
ImplMulw(u, array[4], array2[4], span2.Slice(8, span2.Length - 8));
ulong num = array[0] ^ array[1];
ulong num2 = array2[0] ^ array2[1];
ulong num3 = array[0] ^ array[2];
ulong num4 = array2[0] ^ array2[2];
ulong num5 = array[2] ^ array[4];
ulong num6 = array2[2] ^ array2[4];
ulong num7 = array[3] ^ array[4];
ulong num8 = array2[3] ^ array2[4];
ImplMulw(u, num3 ^ array[3], num4 ^ array2[3], span2.Slice(18, span2.Length - 18));
ImplMulw(u, num5 ^ array[1], num6 ^ array2[1], span2.Slice(20, span2.Length - 20));
ulong num9 = num ^ num7;
ulong num10 = num2 ^ num8;
ulong x2 = num9 ^ array[2];
ulong y2 = num10 ^ array2[2];
ImplMulw(u, num9, num10, span2.Slice(22, span2.Length - 22));
ImplMulw(u, x2, y2, span2.Slice(24, span2.Length - 24));
ImplMulw(u, num, num2, span2.Slice(10, span2.Length - 10));
ImplMulw(u, num3, num4, span2.Slice(12, span2.Length - 12));
ImplMulw(u, num5, num6, span2.Slice(14, span2.Length - 14));
ImplMulw(u, num7, num8, span2.Slice(16, span2.Length - 16));
zz[0] = span2[0];
zz[9] = span2[9];
ulong num11 = span2[0] ^ span2[1];
ulong num12 = num11 ^ span2[2];
ulong num13 = num12 ^ span2[10];
zz[1] = num13;
ulong num14 = span2[3] ^ span2[4];
ulong num15 = span2[11] ^ span2[12];
ulong num16 = num14 ^ num15;
ulong num17 = num12 ^ num16;
zz[2] = num17;
ulong num18 = num11 ^ num14;
ulong num19 = span2[5] ^ span2[6];
ulong num20 = num18 ^ num19 ^ span2[8];
ulong num21 = span2[13] ^ span2[14];
ulong num22 = num20 ^ num21;
ulong num23 = span2[18] ^ span2[22] ^ span2[24];
ulong num24 = num22 ^ num23;
zz[3] = num24;
ulong num25 = span2[7] ^ span2[8] ^ span2[9];
ulong num26 = num25 ^ span2[17];
zz[8] = num26;
ulong num27 = num25 ^ num19;
ulong num28 = span2[15] ^ span2[16];
ulong num29 = num27 ^ num28;
zz[7] = num29;
ulong num30 = num29 ^ num13;
ulong num31 = span2[19] ^ span2[20];
ulong num32 = span2[25] ^ span2[24];
ulong num33 = span2[18] ^ span2[23];
ulong num34 = num31 ^ num32;
ulong num35 = num34 ^ num33 ^ num30;
zz[4] = num35;
ulong num36 = num17 ^ num26;
ulong num37 = num34 ^ num36;
ulong num38 = span2[21] ^ span2[22];
ulong num39 = num37 ^ num38;
zz[5] = num39;
ulong num40 = num20 ^ span2[0] ^ span2[9] ^ num21 ^ span2[21] ^ span2[23] ^ span2[25];
zz[6] = num40;
ImplCompactExt(zz);
}
}
private static void ImplMulw(Span<ulong> u, ulong x, ulong y, Span<ulong> z)
{
u[1] = y;
u[2] = u[1] << 1;
u[3] = (u[2] ^ y);
u[4] = u[2] << 1;
u[5] = (u[4] ^ y);
u[6] = u[3] << 1;
u[7] = (u[6] ^ y);
uint num = (uint)x;
ulong num2 = 0;
ulong num3 = u[(int)(num & 7)];
int num4 = 48;
do {
num = (uint)(x >> num4);
ulong num5 = u[(int)(num & 7)] ^ (u[(int)((num >> 3) & 7)] << 3) ^ (u[(int)((num >> 6) & 7)] << 6);
num3 ^= num5 << num4;
num2 ^= num5 >> -num4;
} while ((num4 -= 9) > 0);
num2 ^= (ulong)((long)(x & 72198606942111744) & ((long)(y << 7) >> 63)) >> 8;
z[0] ^= (num3 & 144115188075855871);
z[1] ^= ((num3 >> 57) ^ (num2 << 7));
}
private static void ImplSquare(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
zz[8] = Interleave.Expand32to64((uint)x[4]);
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) {
zz[7] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 6148914691236517205);
zz[6] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[3], 6148914691236517205);
zz[5] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 6148914691236517205);
zz[4] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[2], 6148914691236517205);
zz[3] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 6148914691236517205);
zz[2] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[1], 6148914691236517205);
zz[1] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 6148914691236517205);
zz[0] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[0], 6148914691236517205);
} else
Interleave.Expand64To128(x.Slice(0, 4), zz.Slice(0, 8));
}
}
}