SecT571Field
using Org.BouncyCastle.Math.Raw;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Math.EC.Custom.Sec
{
internal static class SecT571Field
{
private const ulong M59 = 576460752303423487;
private static readonly ulong[] ROOT_Z = new ulong[9] {
3161836309350906777,
10804290191530228771,
14625517132619890193,
7312758566309945096,
17890083061325672324,
8945041530681231562,
13695892802195391589,
6847946401097695794,
541669439031730457
};
public static void Add(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
Nat.Xor64(9, x, y, z);
}
private static void Add(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
{
Nat.Xor64(9, x, xOff, y, yOff, z, zOff);
}
public static void AddBothTo(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
for (int i = 0; i < 9; i++) {
z[i] ^= (x[i] ^ y[i]);
}
}
private static void AddBothTo(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
{
for (int i = 0; i < 9; i++) {
z[zOff + i] ^= (x[xOff + i] ^ y[yOff + i]);
}
}
public static void AddExt(ReadOnlySpan<ulong> xx, ReadOnlySpan<ulong> yy, Span<ulong> zz)
{
Nat.Xor64(18, xx, yy, zz);
}
public static void AddOne(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] = (x[0] ^ 1);
for (int i = 1; i < 9; i++) {
z[i] = x[i];
}
}
public static void AddTo(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Nat.XorTo64(9, x, z);
}
public static ulong[] FromBigInteger(BigInteger x)
{
return Nat.FromBigInteger64(571, x);
}
public unsafe static void HalfTrace(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
Nat576.Copy64(x, z);
for (int i = 1; i < 571; i += 2) {
ImplSquare(z, span);
Reduce(span, z);
ImplSquare(z, span);
Reduce(span, z);
AddTo(x, z);
}
}
public unsafe static void Invert(ReadOnlySpan<ulong> x, Span<ulong> z)
{
if (Nat576.IsZero64(x))
throw new InvalidOperationException();
Span<ulong> span = new Span<ulong>(stackalloc byte[72], 9);
Span<ulong> span2 = new Span<ulong>(stackalloc byte[72], 9);
Span<ulong> span3 = new Span<ulong>(stackalloc byte[72], 9);
Square(x, span3);
Square(span3, span);
Square(span, span2);
Multiply(span, span2, span);
SquareN(span, 2, span2);
Multiply(span, span2, span);
Multiply(span, span3, span);
SquareN(span, 5, span2);
Multiply(span, span2, span);
SquareN(span2, 5, span2);
Multiply(span, span2, span);
SquareN(span, 15, span2);
Multiply(span, span2, span3);
SquareN(span3, 30, span);
SquareN(span, 30, span2);
Multiply(span, span2, span);
SquareN(span, 60, span2);
Multiply(span, span2, span);
SquareN(span2, 60, span2);
Multiply(span, span2, span);
SquareN(span, 180, span2);
Multiply(span, span2, span);
SquareN(span2, 180, span2);
Multiply(span, span2, span);
Multiply(span, span3, z);
}
public unsafe static void Multiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplMultiply(x, y, span);
Reduce(span, z);
}
public unsafe static void MultiplyAddToExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplMultiply(x, y, span);
AddExt(zz, span, zz);
}
public static void MultiplyExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
zz.Slice(0, 18).Fill(0);
ImplMultiply(x, y, zz);
}
public unsafe static void MultiplyPrecomp(ReadOnlySpan<ulong> x, ulong[] precomp, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplMultiplyPrecomp(x, precomp, span);
Reduce(span, z);
}
public unsafe static void MultiplyPrecompAddToExt(ReadOnlySpan<ulong> x, ulong[] precomp, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplMultiplyPrecomp(x, precomp, span);
AddExt(zz, span, zz);
}
public static ulong[] PrecompMultiplicand(ReadOnlySpan<ulong> x)
{
ulong[] array = Nat576.Create64();
Nat576.Copy64(x, array);
return array;
}
public static void Reduce(ReadOnlySpan<ulong> xx, Span<ulong> z)
{
ulong num = xx[9];
ulong num2 = xx[17];
ulong num3 = num;
num = (num3 ^ (num2 >> 59) ^ (num2 >> 57) ^ (num2 >> 54) ^ (num2 >> 49));
num3 = (xx[8] ^ (num2 << 5) ^ (num2 << 7) ^ (num2 << 10) ^ (num2 << 15));
for (int num4 = 16; num4 >= 10; num4--) {
num2 = xx[num4];
z[num4 - 8] = (num3 ^ (num2 >> 59) ^ (num2 >> 57) ^ (num2 >> 54) ^ (num2 >> 49));
num3 = (xx[num4 - 9] ^ (num2 << 5) ^ (num2 << 7) ^ (num2 << 10) ^ (num2 << 15));
}
num2 = num;
z[1] = (num3 ^ (num2 >> 59) ^ (num2 >> 57) ^ (num2 >> 54) ^ (num2 >> 49));
num3 = (xx[0] ^ (num2 << 5) ^ (num2 << 7) ^ (num2 << 10) ^ (num2 << 15));
ulong num5 = z[8];
ulong num6 = num5 >> 59;
z[0] = (num3 ^ num6 ^ (num6 << 2) ^ (num6 << 5) ^ (num6 << 10));
z[8] = (num5 & 576460752303423487);
}
public static void Reduce5(ulong[] z, int zOff)
{
ulong num = z[zOff + 8];
ulong num2 = num >> 59;
z[zOff] ^= (num2 ^ (num2 << 2) ^ (num2 << 5) ^ (num2 << 10));
z[zOff + 8] = (num & 576460752303423487);
}
public unsafe static void Sqrt(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[72], 9);
Span<ulong> span2 = new Span<ulong>(stackalloc byte[72], 9);
span2[0] = Interleave.Unshuffle(x[0], x[1], out span[0]);
span2[1] = Interleave.Unshuffle(x[2], x[3], out span[1]);
span2[2] = Interleave.Unshuffle(x[4], x[5], out span[2]);
span2[3] = Interleave.Unshuffle(x[6], x[7], out span[3]);
span2[4] = Interleave.Unshuffle(x[8], out span[4]);
Multiply(span2, ROOT_Z, z);
Add(z, span, z);
}
public unsafe static void Square(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplSquare(x, span);
Reduce(span, z);
}
public unsafe static void SquareAddToExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplSquare(x, span);
AddExt(zz, span, zz);
}
public static void SquareExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
ImplSquare(x, zz);
}
public unsafe static void SquareN(ReadOnlySpan<ulong> x, int n, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[144], 18);
ImplSquare(x, span);
Reduce(span, z);
while (--n > 0) {
ImplSquare(z, span);
Reduce(span, z);
}
}
public static uint Trace(ReadOnlySpan<ulong> x)
{
return (uint)((int)(x[0] ^ (x[8] >> 49) ^ (x[8] >> 57)) & 1);
}
private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
ulong[] array = new ulong[16];
for (int i = 0; i < 9; i++) {
Span<ulong> u = array;
ulong x2 = x[i];
ulong y2 = y[i];
int num = i << 1;
ImplMulwAcc(u, x2, y2, zz.Slice(num, zz.Length - num));
}
ulong num2 = zz[0];
ulong num3 = zz[1];
num2 ^= zz[2];
zz[1] = (num2 ^ num3);
num3 ^= zz[3];
num2 ^= zz[4];
zz[2] = (num2 ^ num3);
num3 ^= zz[5];
num2 ^= zz[6];
zz[3] = (num2 ^ num3);
num3 ^= zz[7];
num2 ^= zz[8];
zz[4] = (num2 ^ num3);
num3 ^= zz[9];
num2 ^= zz[10];
zz[5] = (num2 ^ num3);
num3 ^= zz[11];
num2 ^= zz[12];
zz[6] = (num2 ^ num3);
num3 ^= zz[13];
num2 ^= zz[14];
zz[7] = (num2 ^ num3);
num3 ^= zz[15];
num2 ^= zz[16];
zz[8] = (num2 ^ num3);
num3 ^= zz[17];
ulong num4 = num2 ^ num3;
zz[9] = (zz[0] ^ num4);
zz[10] = (zz[1] ^ num4);
zz[11] = (zz[2] ^ num4);
zz[12] = (zz[3] ^ num4);
zz[13] = (zz[4] ^ num4);
zz[14] = (zz[5] ^ num4);
zz[15] = (zz[6] ^ num4);
zz[16] = (zz[7] ^ num4);
zz[17] = (zz[8] ^ num4);
ImplMulwAcc(array, x[0] ^ x[1], y[0] ^ y[1], zz.Slice(1, zz.Length - 1));
ImplMulwAcc(array, x[0] ^ x[2], y[0] ^ y[2], zz.Slice(2, zz.Length - 2));
ImplMulwAcc(array, x[0] ^ x[3], y[0] ^ y[3], zz.Slice(3, zz.Length - 3));
ImplMulwAcc(array, x[1] ^ x[2], y[1] ^ y[2], zz.Slice(3, zz.Length - 3));
ImplMulwAcc(array, x[0] ^ x[4], y[0] ^ y[4], zz.Slice(4, zz.Length - 4));
ImplMulwAcc(array, x[1] ^ x[3], y[1] ^ y[3], zz.Slice(4, zz.Length - 4));
ImplMulwAcc(array, x[0] ^ x[5], y[0] ^ y[5], zz.Slice(5, zz.Length - 5));
ImplMulwAcc(array, x[1] ^ x[4], y[1] ^ y[4], zz.Slice(5, zz.Length - 5));
ImplMulwAcc(array, x[2] ^ x[3], y[2] ^ y[3], zz.Slice(5, zz.Length - 5));
ImplMulwAcc(array, x[0] ^ x[6], y[0] ^ y[6], zz.Slice(6, zz.Length - 6));
ImplMulwAcc(array, x[1] ^ x[5], y[1] ^ y[5], zz.Slice(6, zz.Length - 6));
ImplMulwAcc(array, x[2] ^ x[4], y[2] ^ y[4], zz.Slice(6, zz.Length - 6));
ImplMulwAcc(array, x[0] ^ x[7], y[0] ^ y[7], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(array, x[1] ^ x[6], y[1] ^ y[6], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(array, x[2] ^ x[5], y[2] ^ y[5], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(array, x[3] ^ x[4], y[3] ^ y[4], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(array, x[0] ^ x[8], y[0] ^ y[8], zz.Slice(8, zz.Length - 8));
ImplMulwAcc(array, x[1] ^ x[7], y[1] ^ y[7], zz.Slice(8, zz.Length - 8));
ImplMulwAcc(array, x[2] ^ x[6], y[2] ^ y[6], zz.Slice(8, zz.Length - 8));
ImplMulwAcc(array, x[3] ^ x[5], y[3] ^ y[5], zz.Slice(8, zz.Length - 8));
ImplMulwAcc(array, x[1] ^ x[8], y[1] ^ y[8], zz.Slice(9, zz.Length - 9));
ImplMulwAcc(array, x[2] ^ x[7], y[2] ^ y[7], zz.Slice(9, zz.Length - 9));
ImplMulwAcc(array, x[3] ^ x[6], y[3] ^ y[6], zz.Slice(9, zz.Length - 9));
ImplMulwAcc(array, x[4] ^ x[5], y[4] ^ y[5], zz.Slice(9, zz.Length - 9));
ImplMulwAcc(array, x[2] ^ x[8], y[2] ^ y[8], zz.Slice(10, zz.Length - 10));
ImplMulwAcc(array, x[3] ^ x[7], y[3] ^ y[7], zz.Slice(10, zz.Length - 10));
ImplMulwAcc(array, x[4] ^ x[6], y[4] ^ y[6], zz.Slice(10, zz.Length - 10));
ImplMulwAcc(array, x[3] ^ x[8], y[3] ^ y[8], zz.Slice(11, zz.Length - 11));
ImplMulwAcc(array, x[4] ^ x[7], y[4] ^ y[7], zz.Slice(11, zz.Length - 11));
ImplMulwAcc(array, x[5] ^ x[6], y[5] ^ y[6], zz.Slice(11, zz.Length - 11));
ImplMulwAcc(array, x[4] ^ x[8], y[4] ^ y[8], zz.Slice(12, zz.Length - 12));
ImplMulwAcc(array, x[5] ^ x[7], y[5] ^ y[7], zz.Slice(12, zz.Length - 12));
ImplMulwAcc(array, x[5] ^ x[8], y[5] ^ y[8], zz.Slice(13, zz.Length - 13));
ImplMulwAcc(array, x[6] ^ x[7], y[6] ^ y[7], zz.Slice(13, zz.Length - 13));
ImplMulwAcc(array, x[6] ^ x[8], y[6] ^ y[8], zz.Slice(14, zz.Length - 14));
ImplMulwAcc(array, x[7] ^ x[8], y[7] ^ y[8], zz.Slice(15, zz.Length - 15));
}
private static void ImplMultiplyPrecomp(ReadOnlySpan<ulong> x, ulong[] precomp, Span<ulong> zz)
{
ImplMultiply(x, precomp, zz);
}
private static void ImplMulwAcc(Span<ulong> u, ulong x, ulong y, Span<ulong> z)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) {
Vector128<ulong> left = Vector128.CreateScalar(x);
Vector128<ulong> right = Vector128.CreateScalar(y);
Vector128<ulong> vector = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 0);
z[0] ^= vector.GetElement(0);
z[1] ^= vector.GetElement(1);
} else {
u[1] = y;
for (int i = 2; i < 16; i += 2) {
u[i] = u[i >> 1] << 1;
u[i + 1] = (u[i] ^ y);
}
uint num = (uint)x;
ulong num2 = 0;
ulong num3 = u[(int)(num & 15)] ^ (u[(int)((num >> 4) & 15)] << 4);
int num4 = 56;
do {
num = (uint)(x >> num4);
ulong num5 = u[(int)(num & 15)] ^ (u[(int)((num >> 4) & 15)] << 4);
num3 ^= num5 << num4;
num2 ^= num5 >> -num4;
} while ((num4 -= 8) > 0);
for (int j = 0; j < 7; j++) {
x = (ulong)((long)x & -72340172838076674) >> 1;
num2 = (ulong)((long)num2 ^ ((long)x & ((long)(y << j) >> 63)));
}
z[0] ^= num3;
z[1] ^= num2;
}
}
private static void ImplSquare(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) {
zz[17] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[8] >> 32, 6148914691236517205);
zz[16] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[8], 6148914691236517205);
zz[15] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[7] >> 32, 6148914691236517205);
zz[14] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[7], 6148914691236517205);
zz[13] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[6] >> 32, 6148914691236517205);
zz[12] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[6], 6148914691236517205);
zz[11] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[5] >> 32, 6148914691236517205);
zz[10] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[5], 6148914691236517205);
zz[9] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[4] >> 32, 6148914691236517205);
zz[8] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[4], 6148914691236517205);
zz[7] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 6148914691236517205);
zz[6] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[3], 6148914691236517205);
zz[5] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 6148914691236517205);
zz[4] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[2], 6148914691236517205);
zz[3] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 6148914691236517205);
zz[2] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[1], 6148914691236517205);
zz[1] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 6148914691236517205);
zz[0] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[0], 6148914691236517205);
} else
Interleave.Expand64To128(x.Slice(0, 9), zz.Slice(0, 18));
}
}
}