SecT409Field
using Org.BouncyCastle.Math.Raw;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Math.EC.Custom.Sec
{
internal static class SecT409Field
{
private const ulong M25 = 33554431;
private const ulong M59 = 576460752303423487;
public static void Add(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
z[0] = (x[0] ^ y[0]);
z[1] = (x[1] ^ y[1]);
z[2] = (x[2] ^ y[2]);
z[3] = (x[3] ^ y[3]);
z[4] = (x[4] ^ y[4]);
z[5] = (x[5] ^ y[5]);
z[6] = (x[6] ^ y[6]);
}
public static void AddBothTo(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
z[0] ^= (x[0] ^ y[0]);
z[1] ^= (x[1] ^ y[1]);
z[2] ^= (x[2] ^ y[2]);
z[3] ^= (x[3] ^ y[3]);
z[4] ^= (x[4] ^ y[4]);
z[5] ^= (x[5] ^ y[5]);
z[6] ^= (x[6] ^ y[6]);
}
public static void AddExt(ReadOnlySpan<ulong> xx, ReadOnlySpan<ulong> yy, Span<ulong> zz)
{
for (int i = 0; i < 13; i++) {
zz[i] = (xx[i] ^ yy[i]);
}
}
public static void AddOne(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] = (x[0] ^ 1);
z[1] = x[1];
z[2] = x[2];
z[3] = x[3];
z[4] = x[4];
z[5] = x[5];
z[6] = x[6];
}
public static void AddTo(ReadOnlySpan<ulong> x, Span<ulong> z)
{
z[0] ^= x[0];
z[1] ^= x[1];
z[2] ^= x[2];
z[3] ^= x[3];
z[4] ^= x[4];
z[5] ^= x[5];
z[6] ^= x[6];
}
public static ulong[] FromBigInteger(BigInteger x)
{
return Nat.FromBigInteger64(409, x);
}
public unsafe static void HalfTrace(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[104], 13);
Nat448.Copy64(x, z);
for (int i = 1; i < 409; i += 2) {
ImplSquare(z, span);
Reduce(span, z);
ImplSquare(z, span);
Reduce(span, z);
AddTo(x, z);
}
}
public unsafe static void Invert(ReadOnlySpan<ulong> x, Span<ulong> z)
{
if (Nat448.IsZero64(x))
throw new InvalidOperationException();
Span<ulong> span = new Span<ulong>(stackalloc byte[56], 7);
Span<ulong> span2 = new Span<ulong>(stackalloc byte[56], 7);
Span<ulong> span3 = new Span<ulong>(stackalloc byte[56], 7);
Square(x, span);
SquareN(span, 1, span2);
Multiply(span, span2, span);
SquareN(span2, 1, span2);
Multiply(span, span2, span);
SquareN(span, 3, span2);
Multiply(span, span2, span);
SquareN(span, 6, span2);
Multiply(span, span2, span);
SquareN(span, 12, span2);
Multiply(span, span2, span3);
SquareN(span3, 24, span);
SquareN(span, 24, span2);
Multiply(span, span2, span);
SquareN(span, 48, span2);
Multiply(span, span2, span);
SquareN(span, 96, span2);
Multiply(span, span2, span);
SquareN(span, 192, span2);
Multiply(span, span2, span);
Multiply(span, span3, z);
}
public unsafe static void Multiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[112], 14);
ImplMultiply(x, y, span);
Reduce(span, z);
}
public unsafe static void MultiplyAddToExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[112], 14);
ImplMultiply(x, y, span);
AddExt(zz, span, zz);
}
public static void MultiplyExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
zz.Slice(0, 14).Fill(0);
ImplMultiply(x, y, zz);
}
public static void Reduce(ReadOnlySpan<ulong> xx, Span<ulong> z)
{
ulong num = xx[0];
ulong num2 = xx[1];
ulong num3 = xx[2];
ulong num4 = xx[3];
ulong num5 = xx[4];
ulong num6 = xx[5];
ulong num7 = xx[6];
ulong num8 = xx[7];
ulong num9 = xx[12];
num6 ^= num9 << 39;
num7 ^= ((num9 >> 25) ^ (num9 << 62));
ulong num10 = num8 ^ (num9 >> 2);
num9 = xx[11];
num5 ^= num9 << 39;
num6 ^= ((num9 >> 25) ^ (num9 << 62));
num7 ^= num9 >> 2;
num9 = xx[10];
num4 ^= num9 << 39;
num5 ^= ((num9 >> 25) ^ (num9 << 62));
num6 ^= num9 >> 2;
num9 = xx[9];
num3 ^= num9 << 39;
num4 ^= ((num9 >> 25) ^ (num9 << 62));
num5 ^= num9 >> 2;
num9 = xx[8];
num2 ^= num9 << 39;
num3 ^= ((num9 >> 25) ^ (num9 << 62));
num4 ^= num9 >> 2;
num9 = num10;
num ^= num9 << 39;
num2 ^= ((num9 >> 25) ^ (num9 << 62));
num3 ^= num9 >> 2;
ulong num11 = num7 >> 25;
z[0] = (num ^ num11);
z[1] = (num2 ^ (num11 << 23));
z[2] = num3;
z[3] = num4;
z[4] = num5;
z[5] = num6;
z[6] = (num7 & 33554431);
}
public static void Reduce39(ulong[] z, int zOff)
{
ulong num = z[zOff + 6];
ulong num2 = num >> 25;
z[zOff] ^= num2;
z[zOff + 1] ^= num2 << 23;
z[zOff + 6] = (num & 33554431);
}
public static void Sqrt(ReadOnlySpan<ulong> x, Span<ulong> z)
{
ulong even;
ulong num = Interleave.Unshuffle(x[0], x[1], out even);
ulong even2;
ulong num2 = Interleave.Unshuffle(x[2], x[3], out even2);
ulong even3;
ulong num3 = Interleave.Unshuffle(x[4], x[5], out even3);
ulong even4;
ulong num4 = Interleave.Unshuffle(x[6], out even4);
z[0] = (even ^ (num << 44));
z[1] = (even2 ^ (num2 << 44) ^ (num >> 20));
z[2] = (even3 ^ (num3 << 44) ^ (num2 >> 20));
z[3] = (even4 ^ (num4 << 44) ^ (num3 >> 20) ^ (num << 13));
z[4] = ((num4 >> 20) ^ (num2 << 13) ^ (num >> 51));
z[5] = ((num3 << 13) ^ (num2 >> 51));
z[6] = ((num4 << 13) ^ (num3 >> 51));
}
public unsafe static void Square(ReadOnlySpan<ulong> x, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[104], 13);
ImplSquare(x, span);
Reduce(span, z);
}
public unsafe static void SquareAddToExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[104], 13);
ImplSquare(x, span);
AddExt(zz, span, zz);
}
public static void SquareExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
ImplSquare(x, zz);
}
public unsafe static void SquareN(ReadOnlySpan<ulong> x, int n, Span<ulong> z)
{
Span<ulong> span = new Span<ulong>(stackalloc byte[104], 13);
ImplSquare(x, span);
Reduce(span, z);
while (--n > 0) {
ImplSquare(z, span);
Reduce(span, z);
}
}
public static uint Trace(ReadOnlySpan<ulong> x)
{
return (uint)((int)x[0] & 1);
}
private static void ImplCompactExt(Span<ulong> zz)
{
ulong num = zz[0];
ulong num2 = zz[1];
ulong num3 = zz[2];
ulong num4 = zz[3];
ulong num5 = zz[4];
ulong num6 = zz[5];
ulong num7 = zz[6];
ulong num8 = zz[7];
ulong num9 = zz[8];
ulong num10 = zz[9];
ulong num11 = zz[10];
ulong num12 = zz[11];
ulong num13 = zz[12];
ulong num14 = zz[13];
zz[0] = (num ^ (num2 << 59));
zz[1] = ((num2 >> 5) ^ (num3 << 54));
zz[2] = ((num3 >> 10) ^ (num4 << 49));
zz[3] = ((num4 >> 15) ^ (num5 << 44));
zz[4] = ((num5 >> 20) ^ (num6 << 39));
zz[5] = ((num6 >> 25) ^ (num7 << 34));
zz[6] = ((num7 >> 30) ^ (num8 << 29));
zz[7] = ((num8 >> 35) ^ (num9 << 24));
zz[8] = ((num9 >> 40) ^ (num10 << 19));
zz[9] = ((num10 >> 45) ^ (num11 << 14));
zz[10] = ((num11 >> 50) ^ (num12 << 9));
zz[11] = ((num12 >> 55) ^ (num13 << 4) ^ (num14 << 63));
zz[12] = num14 >> 1;
}
private static void ImplExpand(ReadOnlySpan<ulong> x, Span<ulong> z)
{
ulong num = x[0];
ulong num2 = x[1];
ulong num3 = x[2];
ulong num4 = x[3];
ulong num5 = x[4];
ulong num6 = x[5];
ulong num7 = x[6];
z[0] = (num & 576460752303423487);
z[1] = (((num >> 59) ^ (num2 << 5)) & 576460752303423487);
z[2] = (((num2 >> 54) ^ (num3 << 10)) & 576460752303423487);
z[3] = (((num3 >> 49) ^ (num4 << 15)) & 576460752303423487);
z[4] = (((num4 >> 44) ^ (num5 << 20)) & 576460752303423487);
z[5] = (((num5 >> 39) ^ (num6 << 25)) & 576460752303423487);
z[6] = ((num6 >> 34) ^ (num7 << 30));
}
private unsafe static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
{
Span<ulong> z = new Span<ulong>(stackalloc byte[56], 7);
Span<ulong> z2 = new Span<ulong>(stackalloc byte[56], 7);
ImplExpand(x, z);
ImplExpand(y, z2);
Span<ulong> span = new Span<ulong>(stackalloc byte[64], 8);
for (int i = 0; i < 7; i++) {
Span<ulong> u = span;
ulong x2 = z[i];
ulong y2 = z2[i];
int num = i << 1;
ImplMulwAcc(u, x2, y2, zz.Slice(num, zz.Length - num));
}
ulong num2 = zz[0];
ulong num3 = zz[1];
num2 ^= zz[2];
zz[1] = (num2 ^ num3);
num3 ^= zz[3];
num2 ^= zz[4];
zz[2] = (num2 ^ num3);
num3 ^= zz[5];
num2 ^= zz[6];
zz[3] = (num2 ^ num3);
num3 ^= zz[7];
num2 ^= zz[8];
zz[4] = (num2 ^ num3);
num3 ^= zz[9];
num2 ^= zz[10];
zz[5] = (num2 ^ num3);
num3 ^= zz[11];
num2 ^= zz[12];
zz[6] = (num2 ^ num3);
num3 ^= zz[13];
ulong num4 = num2 ^ num3;
zz[7] = (zz[0] ^ num4);
zz[8] = (zz[1] ^ num4);
zz[9] = (zz[2] ^ num4);
zz[10] = (zz[3] ^ num4);
zz[11] = (zz[4] ^ num4);
zz[12] = (zz[5] ^ num4);
zz[13] = (zz[6] ^ num4);
ImplMulwAcc(span, z[0] ^ z[1], z2[0] ^ z2[1], zz.Slice(1, zz.Length - 1));
ImplMulwAcc(span, z[0] ^ z[2], z2[0] ^ z2[2], zz.Slice(2, zz.Length - 2));
ImplMulwAcc(span, z[0] ^ z[3], z2[0] ^ z2[3], zz.Slice(3, zz.Length - 3));
ImplMulwAcc(span, z[1] ^ z[2], z2[1] ^ z2[2], zz.Slice(3, zz.Length - 3));
ImplMulwAcc(span, z[0] ^ z[4], z2[0] ^ z2[4], zz.Slice(4, zz.Length - 4));
ImplMulwAcc(span, z[1] ^ z[3], z2[1] ^ z2[3], zz.Slice(4, zz.Length - 4));
ImplMulwAcc(span, z[0] ^ z[5], z2[0] ^ z2[5], zz.Slice(5, zz.Length - 5));
ImplMulwAcc(span, z[1] ^ z[4], z2[1] ^ z2[4], zz.Slice(5, zz.Length - 5));
ImplMulwAcc(span, z[2] ^ z[3], z2[2] ^ z2[3], zz.Slice(5, zz.Length - 5));
ImplMulwAcc(span, z[0] ^ z[6], z2[0] ^ z2[6], zz.Slice(6, zz.Length - 6));
ImplMulwAcc(span, z[1] ^ z[5], z2[1] ^ z2[5], zz.Slice(6, zz.Length - 6));
ImplMulwAcc(span, z[2] ^ z[4], z2[2] ^ z2[4], zz.Slice(6, zz.Length - 6));
ImplMulwAcc(span, z[1] ^ z[6], z2[1] ^ z2[6], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(span, z[2] ^ z[5], z2[2] ^ z2[5], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(span, z[3] ^ z[4], z2[3] ^ z2[4], zz.Slice(7, zz.Length - 7));
ImplMulwAcc(span, z[2] ^ z[6], z2[2] ^ z2[6], zz.Slice(8, zz.Length - 8));
ImplMulwAcc(span, z[3] ^ z[5], z2[3] ^ z2[5], zz.Slice(8, zz.Length - 8));
ImplMulwAcc(span, z[3] ^ z[6], z2[3] ^ z2[6], zz.Slice(9, zz.Length - 9));
ImplMulwAcc(span, z[4] ^ z[5], z2[4] ^ z2[5], zz.Slice(9, zz.Length - 9));
ImplMulwAcc(span, z[4] ^ z[6], z2[4] ^ z2[6], zz.Slice(10, zz.Length - 10));
ImplMulwAcc(span, z[5] ^ z[6], z2[5] ^ z2[6], zz.Slice(11, zz.Length - 11));
ImplCompactExt(zz);
}
private static void ImplMulwAcc(Span<ulong> u, ulong x, ulong y, Span<ulong> z)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) {
Vector128<ulong> left = Vector128.CreateScalar(x);
Vector128<ulong> right = Vector128.CreateScalar(y);
Vector128<ulong> vector = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 0);
ulong element = vector.GetElement(0);
ulong element2 = vector.GetElement(1);
z[0] ^= (element & 576460752303423487);
z[1] ^= ((element >> 59) ^ (element2 << 5));
} else {
u[1] = y;
u[2] = u[1] << 1;
u[3] = (u[2] ^ y);
u[4] = u[2] << 1;
u[5] = (u[4] ^ y);
u[6] = u[3] << 1;
u[7] = (u[6] ^ y);
uint num = (uint)x;
ulong num2 = 0;
ulong num3 = u[(int)(num & 7)] ^ (u[(int)((num >> 3) & 7)] << 3);
int num4 = 54;
do {
num = (uint)(x >> num4);
ulong num5 = u[(int)(num & 7)] ^ (u[(int)((num >> 3) & 7)] << 3);
num3 ^= num5 << num4;
num2 ^= num5 >> -num4;
} while ((num4 -= 6) > 0);
z[0] ^= (num3 & 576460752303423487);
z[1] ^= ((num3 >> 59) ^ (num2 << 5));
}
}
private static void ImplSquare(ReadOnlySpan<ulong> x, Span<ulong> zz)
{
zz[12] = Interleave.Expand32to64((uint)x[6]);
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) {
zz[11] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[5] >> 32, 6148914691236517205);
zz[10] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[5], 6148914691236517205);
zz[9] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[4] >> 32, 6148914691236517205);
zz[8] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[4], 6148914691236517205);
zz[7] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 6148914691236517205);
zz[6] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[3], 6148914691236517205);
zz[5] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 6148914691236517205);
zz[4] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[2], 6148914691236517205);
zz[3] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 6148914691236517205);
zz[2] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[1], 6148914691236517205);
zz[1] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 6148914691236517205);
zz[0] = System.Runtime.Intrinsics.X86.Bmi2.X64.ParallelBitDeposit(x[0], 6148914691236517205);
} else
Interleave.Expand64To128(x.Slice(0, 6), zz.Slice(0, 12));
}
}
}