SecT233Field
using Org.BouncyCastle.Math.Raw;
using Org.BouncyCastle.Runtime.Intrinsics;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using System;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Math.EC.Custom.Sec
{
internal static class SecT233Field
{
private const ulong M41 = 2199023255551;
private const ulong M59 = 576460752303423487;
public static void Add(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
z[0] = (x[0] ^ y[0]);
z[1] = (x[1] ^ y[1]);
z[2] = (x[2] ^ y[2]);
z[3] = (x[3] ^ y[3]);
}
public static void AddBothTo(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
z[0] ^= (x[0] ^ y[0]);
z[1] ^= (x[1] ^ y[1]);
z[2] ^= (x[2] ^ y[2]);
z[3] ^= (x[3] ^ y[3]);
}
public static void AddExt(ReadOnlySpan<ulong> xx, ReadOnlySpan<ulong> yy, Span<ulong> zz)
{
zz[0] = (xx[0] ^ yy[0]);
zz[1] = (xx[1] ^ yy[1]);
zz[2] = (xx[2] ^ yy[2]);
zz[3] = (xx[3] ^ yy[3]);
zz[4] = (xx[4] ^ yy[4]);
zz[5] = (xx[5] ^ yy[5]);
zz[6] = (xx[6] ^ yy[6]);
zz[7] = (xx[7] ^ yy[7]);
}
public static void AddOne(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] = (x[0] ^ 1);
z[1] = x[1];
z[2] = x[2];
z[3] = x[3];
}
public static void AddTo(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] ^= x[0];
z[1] ^= x[1];
z[2] ^= x[2];
z[3] ^= x[3];
}
public static ulong[] FromBigInteger(BigInteger x)
{
return Nat.FromBigInteger64(233, x);
}
public unsafe static void HalfTrace(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
Nat256.Copy64(x, z);
for (int i = 1; i < 233; i += 2) {
ImplSquare(z, span);
Reduce(span, z);
ImplSquare(z, span);
Reduce(span, z);
AddTo(x, z);
}
}
public unsafe static void Invert(ReadOnlySpan<ulong> x, Span<ulong> z)
{
if (Nat256.IsZero64(x))
throw new InvalidOperationException();
Span<ulong> span = new Span<ulong>(stackalloc byte[32], 4);
Span<ulong> span2 = new Span<ulong>(stackalloc byte[32], 4);
Square(x, span);
Multiply(span, x, span);
Square(span, span);
Multiply(span, x, span);
SquareN(span, 3, span2);
Multiply(span2, span, span2);
Square(span2, span2);
Multiply(span2, x, span2);
SquareN(span2, 7, span);
Multiply(span, span2, span);
SquareN(span, 14, span2);
Multiply(span2, span, span2);
Square(span2, span2);
Multiply(span2, x, span2);
SquareN(span2, 29, span);
Multiply(span, span2, span);
SquareN(span, 58, span2);
Multiply(span2, span, span2);
SquareN(span2, 116, span);
Multiply(span, span2, span);
Square(span, z);
}
public unsafe static void Multiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
ImplMultiply(x, y, span);
Reduce(span, z);
}
public unsafe static void MultiplyAddToExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
ImplMultiply(x, y, span);
AddExt(zz, span, zz);
}
public static void MultiplyExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
zz.Slice(0, 8).Fill(0);
ImplMultiply(x, y, zz);
}
public static void Reduce(ReadOnlySpan<ulong> xx, Span<ulong> z)
{
ulong num = xx[0];
ulong num2 = xx[1];
ulong num3 = xx[2];
ulong num4 = xx[3];
ulong num5 = xx[4];
ulong num6 = xx[5];
ulong num7 = xx[6];
ulong num8 = xx[7];
num4 ^= num8 << 23;
num5 ^= ((num8 >> 41) ^ (num8 << 33));
num6 ^= num8 >> 31;
num3 ^= num7 << 23;
num4 ^= ((num7 >> 41) ^ (num7 << 33));
num5 ^= num7 >> 31;
num2 ^= num6 << 23;
num3 ^= ((num6 >> 41) ^ (num6 << 33));
num4 ^= num6 >> 31;
num ^= num5 << 23;
num2 ^= ((num5 >> 41) ^ (num5 << 33));
num3 ^= num5 >> 31;
ulong num9 = num4 >> 41;
z[0] = (num ^ num9);
z[1] = (num2 ^ (num9 << 10));
z[2] = num3;
z[3] = (num4 & 2199023255551);
}
public static void Reduce23(ulong[] z, int zOff)
{
ulong num = z[zOff + 3];
ulong num2 = num >> 41;
z[zOff] ^= num2;
z[zOff + 1] ^= num2 << 10;
z[zOff + 3] = (num & 2199023255551);
}
public unsafe static void Sqrt(ReadOnlySpan<ulong> x, Span<ulong> z)
{
ulong num = Interleave.Unshuffle(x[0], x[1], out ulong even);
ulong num2 = Interleave.Unshuffle(x[2], x[3], out ulong even2);
ulong num3 = num2 >> 27;
num2 ^= ((num >> 27) | (num2 << 37));
num ^= num << 37;
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
byte* intPtr = stackalloc byte[12];
*(int*)intPtr = 32;
*(int*)(intPtr + 4) = 117;
*(int*)(intPtr + 2 * 4) = 191;
Span<int> span2 = new Span<int>(intPtr, 3);
for (int i = 0; i < span2.Length; i++) {
int num4 = span2[i] >> 6;
int num5 = span2[i] & 63;
span[num4] ^= num << num5;
span[num4 + 1] ^= ((num2 << num5) | (num >> -num5));
span[num4 + 2] ^= ((num3 << num5) | (num2 >> -num5));
span[num4 + 3] ^= num3 >> -num5;
}
Reduce(span, z);
z[0] ^= even;
z[1] ^= even2;
}
public unsafe static void Square(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
ImplSquare(x, span);
Reduce(span, z);
}
public unsafe static void SquareAddToExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
ImplSquare(x, span);
AddExt(zz, span, zz);
}
public static void SquareExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
ImplSquare(x, zz);
}
public unsafe static void SquareN(ReadOnlySpan<ulong> x, int n, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
ImplSquare(x, span);
Reduce(span, z);
while (--n > 0) {
ImplSquare(z, span);
Reduce(span, z);
}
}
public static uint Trace(ReadOnlySpan<ulong> x)
{
return (uint)((int)(x[0] ^ (x[2] >> 31)) & 1);
}
private static void ImplCompactExt(Span<ulong> zz)
{
ulong num = zz[0];
ulong num2 = zz[1];
ulong num3 = zz[2];
ulong num4 = zz[3];
ulong num5 = zz[4];
ulong num6 = zz[5];
ulong num7 = zz[6];
ulong num8 = zz[7];
zz[0] = (num ^ (num2 << 59));
zz[1] = ((num2 >> 5) ^ (num3 << 54));
zz[2] = ((num3 >> 10) ^ (num4 << 49));
zz[3] = ((num4 >> 15) ^ (num5 << 44));
zz[4] = ((num5 >> 20) ^ (num6 << 39));
zz[5] = ((num6 >> 25) ^ (num7 << 34));
zz[6] = ((num7 >> 30) ^ (num8 << 29));
zz[7] = num8 >> 35;
}
private static void ImplExpand(ReadOnlySpan<ulong> x, Span<ulong> z)
{
ulong num = x[0];
ulong num2 = x[1];
ulong num3 = x[2];
ulong num4 = x[3];
z[0] = (num & 576460752303423487);
z[1] = (((num >> 59) ^ (num2 << 5)) & 576460752303423487);
z[2] = (((num2 >> 54) ^ (num3 << 10)) & 576460752303423487);
z[3] = ((num3 >> 49) ^ (num4 << 15));
}
private unsafe static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && Vector.IsPackedLittleEndian) {
Vector128<ulong> left = Vector128.Create(x[0], x[1]);
Vector128<ulong> vector = Vector128.Create(x[2], x[3]);
Vector128<ulong> vector2 = Vector128.Create(y[0], y[1]);
Vector128<ulong> right = Vector128.Create(y[2], y[3]);
Vector128<ulong> left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left, vector);
Vector128<ulong> right2 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector2, right);
Vector128<ulong> value = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, vector2, 0);
Vector128<ulong> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, vector2, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, vector2, 16));
Vector128<ulong> value3 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, vector2, 17);
value = System.Runtime.Intrinsics.X86.Sse2.Xor(value, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value2, 8));
value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value2, 8));
Vector128<ulong> value4 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(vector, right, 0);
Vector128<ulong> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(vector, right, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(vector, right, 16));
Vector128<ulong> value6 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(vector, right, 17);
value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(value4, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value5, 8));
value6 = System.Runtime.Intrinsics.X86.Sse2.Xor(value6, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value5, 8));
Vector128<ulong> right3 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 0);
Vector128<ulong> value7 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 16));
Vector128<ulong> right4 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 17);
Vector128<ulong> left3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, value4);
value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, value);
value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, value6);
value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, right3);
value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value7, 8));
value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(value4, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value7, 8));
value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(value4, right4);
Span<byte> span = MemoryMarshal.AsBytes(zz);
MemoryMarshal.Write(span.Slice(0, 16), ref value);
MemoryMarshal.Write(span.Slice(16, 16), ref value3);
MemoryMarshal.Write(span.Slice(32, 16), ref value4);
MemoryMarshal.Write(span.Slice(48, 16), ref value6);
} else {
Span<ulong> z = new Span<ulong>(stackalloc byte[32], 4);
Span<ulong> z2 = new Span<ulong>(stackalloc byte[32], 4);
ImplExpand(x, z);
ImplExpand(y, z2);
Span<ulong> u = new Span<ulong>(stackalloc byte[64], 8);
ImplMulwAcc(u, z[0], z2[0], zz.Slice(0, zz.Length));
ImplMulwAcc(u, z[1], z2[1], zz.Slice(1, zz.Length - 1));
ImplMulwAcc(u, z[2], z2[2], zz.Slice(2, zz.Length - 2));
ImplMulwAcc(u, z[3], z2[3], zz.Slice(3, zz.Length - 3));
for (int num = 5; num > 0; num--) {
zz[num] ^= zz[num - 1];
}
ImplMulwAcc(u, z[0] ^ z[1], z2[0] ^ z2[1], zz.Slice(1, zz.Length - 1));
ImplMulwAcc(u, z[2] ^ z[3], z2[2] ^ z2[3], zz.Slice(3, zz.Length - 3));
for (int num2 = 7; num2 > 1; num2--) {
zz[num2] ^= zz[num2 - 2];
}
ulong num3 = z[0] ^ z[2];
ulong num4 = z[1] ^ z[3];
ulong num5 = z2[0] ^ z2[2];
ulong num6 = z2[1] ^ z2[3];
ImplMulwAcc(u, num3 ^ num4, num5 ^ num6, zz.Slice(3, zz.Length - 3));
Span<ulong> span2 = new Span<ulong>(stackalloc byte[24], 3);
ImplMulwAcc(u, num3, num5, span2.Slice(0, span2.Length));
ImplMulwAcc(u, num4, num6, span2.Slice(1, span2.Length - 1));
ulong num7 = span2[0];
ulong num8 = span2[1];
ulong num9 = span2[2];
zz[2] ^= num7;
zz[3] ^= (num7 ^ num8);
zz[4] ^= (num9 ^ num8);
zz[5] ^= num9;
ImplCompactExt(zz);
}
}
private static void ImplMulwAcc(Span<ulong> u, ulong x, ulong y, Span<ulong> z)
{
u[1] = y;
u[2] = u[1] << 1;
u[3] = (u[2] ^ y);
u[4] = u[2] << 1;
u[5] = (u[4] ^ y);
u[6] = u[3] << 1;
u[7] = (u[6] ^ y);
uint num = (uint)x;
ulong num2 = 0;
ulong num3 = u[(int)(num & 7)] ^ (u[(int)((num >> 3) & 7)] << 3);
int num4 = 54;
do {
num = (uint)(x >> num4);
ulong num5 = u[(int)(num & 7)] ^ (u[(int)((num >> 3) & 7)] << 3);
num3 ^= num5 << num4;
num2 ^= num5 >> -num4;
} while ((num4 -= 6) > 0);
z[0] ^= (num3 & 576460752303423487);
z[1] ^= ((num3 >> 59) ^ (num2 << 5));
}
private static void ImplSquare(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) {
ulong num = x[0];
ulong num2 = x[1];
ulong num3 = x[2];
ulong num4 = x[3];
zz[7] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num4 >> 32, 6148914691236517205);
zz[6] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num4, 6148914691236517205);
zz[5] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num3 >> 32, 6148914691236517205);
zz[4] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num3, 6148914691236517205);
zz[3] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num2 >> 32, 6148914691236517205);
zz[2] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num2, 6148914691236517205);
zz[1] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num >> 32, 6148914691236517205);
zz[0] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(num, 6148914691236517205);
} else
Interleave.Expand64To128(x.Slice(0, 4), zz.Slice(0, 8));
}
}
}