GcmUtilities
using Org.BouncyCastle.Crypto.Utilities;
using Org.BouncyCastle.Math.Raw;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using Org.BouncyCastle.Utilities;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Crypto.Modes.Gcm
{
internal static class GcmUtilities
{
internal struct FieldElement
{
internal ulong n0;
internal ulong n1;
}
private const uint E1 = 3774873600;
private const ulong E1UL = 16212958658533785600;
internal static void One(out FieldElement x)
{
x.n0 = 9223372036854775808;
x.n1 = 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void AsBytes(ulong x0, ulong x1, byte[] z)
{
Pack.UInt64_To_BE(x0, z, 0);
Pack.UInt64_To_BE(x1, z, 8);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void AsBytes(ref FieldElement x, byte[] z)
{
AsBytes(x.n0, x.n1, z);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void AsFieldElement(byte[] x, out FieldElement z)
{
z.n0 = Pack.BE_To_UInt64(x, 0);
z.n1 = Pack.BE_To_UInt64(x, 8);
}
internal static void DivideP(ref FieldElement x, out FieldElement z)
{
ulong n = x.n0;
ulong n2 = x.n1;
ulong num = (ulong)((long)n >> 63);
n = (ulong)((long)n ^ ((long)num & -2233785415175766016));
z.n0 = ((n << 1) | (n2 >> 63));
z.n1 = ((n2 << 1) | (0 - num));
}
internal static void Multiply(byte[] x, byte[] y)
{
AsFieldElement(x, out FieldElement z);
AsFieldElement(y, out FieldElement z2);
Multiply(ref z, ref z2);
AsBytes(ref z, x);
}
internal static void Multiply(ref FieldElement x, ref FieldElement y)
{
ulong num3;
ulong num4;
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) {
Vector128<ulong> left = Vector128.Create(x.n1, x.n0);
Vector128<ulong> right = Vector128.Create(y.n1, y.n0);
Vector128<ulong> vector = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 0);
Vector128<ulong> vector2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 16));
Vector128<ulong> vector3 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 17);
ulong element = vector.GetElement(0);
ulong num = vector.GetElement(1) ^ vector2.GetElement(0);
ulong num2 = vector3.GetElement(0) ^ vector2.GetElement(1);
ulong element2 = vector3.GetElement(1);
num2 ^= (element ^ (element >> 1) ^ (element >> 2) ^ (element >> 7));
num ^= ((element << 63) ^ (element << 62) ^ (element << 57));
num3 = ((element2 << 1) | (num2 >> 63));
num4 = ((num2 << 1) | (num >> 63));
ulong num5 = num << 1;
num3 ^= (num5 ^ (num5 >> 1) ^ (num5 >> 2) ^ (num5 >> 7));
num4 ^= ((num << 63) ^ (num << 58));
} else {
ulong n = x.n0;
ulong n2 = x.n1;
ulong n3 = y.n0;
ulong n4 = y.n1;
ulong num6 = Longs.Reverse(n);
ulong num7 = Longs.Reverse(n2);
ulong num8 = Longs.Reverse(n3);
ulong num9 = Longs.Reverse(n4);
ulong num10 = Longs.Reverse(ImplMul64(num6, num8));
ulong num11 = ImplMul64(n, n3) << 1;
ulong num12 = Longs.Reverse(ImplMul64(num7, num9));
ulong num13 = ImplMul64(n2, n4) << 1;
ulong num14 = Longs.Reverse(ImplMul64(num6 ^ num7, num8 ^ num9));
ulong num15 = ImplMul64(n ^ n2, n3 ^ n4) << 1;
num3 = num10;
num4 = (num11 ^ num10 ^ num12 ^ num14);
ulong num5 = num12 ^ num11 ^ num13 ^ num15;
ulong num16 = num13;
num4 ^= (num16 ^ (num16 >> 1) ^ (num16 >> 2) ^ (num16 >> 7));
num5 ^= ((num16 << 62) ^ (num16 << 57));
num3 ^= (num5 ^ (num5 >> 1) ^ (num5 >> 2) ^ (num5 >> 7));
num4 ^= ((num5 << 63) ^ (num5 << 62) ^ (num5 << 57));
}
x.n0 = num3;
x.n1 = num4;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ulong> Load(byte[] x)
{
AsFieldElement(x, out FieldElement z);
return Vector128.Create(z.n1, z.n0);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ulong> Multiply(Vector128<ulong> X, Vector128<ulong> Y)
{
MultiplyExt(X, Y, out Vector128<ulong> Z, out Vector128<ulong> Z2, out Vector128<ulong> Z3);
return Reduce3(Z, Z2, Z3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void MultiplyExt(Vector128<ulong> X, Vector128<ulong> Y, out Vector128<ulong> Z0, out Vector128<ulong> Z1, out Vector128<ulong> Z2)
{
if (!Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled)
throw new PlatformNotSupportedException("MultiplyExt");
Z0 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(X, Y, 0);
Z1 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(X, Y, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(X, Y, 16));
Z2 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(X, Y, 17);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ulong> Reduce2(Vector128<ulong> Z0, Vector128<ulong> Z2)
{
ulong element = Z0.GetElement(0);
ulong element2 = Z0.GetElement(1);
ulong element3 = Z2.GetElement(0);
ulong element4 = Z2.GetElement(1);
element3 ^= (element ^ (element >> 1) ^ (element >> 2) ^ (element >> 7));
element2 ^= ((element << 63) ^ (element << 62) ^ (element << 57));
ulong num = (element4 << 1) | (element3 >> 63);
ulong num2 = (element3 << 1) | (element2 >> 63);
ulong num3 = element2 << 1;
num ^= (num3 ^ (num3 >> 1) ^ (num3 >> 2) ^ (num3 >> 7));
return Vector128.Create(num2 ^ ((element2 << 63) ^ (element2 << 58)), num);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ulong> Reduce3(Vector128<ulong> Z0, Vector128<ulong> Z1, Vector128<ulong> Z2)
{
ulong element = Z0.GetElement(0);
ulong num = Z0.GetElement(1) ^ Z1.GetElement(0);
ulong num2 = Z2.GetElement(0) ^ Z1.GetElement(1);
ulong element2 = Z2.GetElement(1);
num2 ^= (element ^ (element >> 1) ^ (element >> 2) ^ (element >> 7));
num ^= ((element << 63) ^ (element << 62) ^ (element << 57));
ulong num3 = (element2 << 1) | (num2 >> 63);
ulong num4 = (num2 << 1) | (num >> 63);
ulong num5 = num << 1;
num3 ^= (num5 ^ (num5 >> 1) ^ (num5 >> 2) ^ (num5 >> 7));
return Vector128.Create(num4 ^ ((num << 63) ^ (num << 58)), num3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void Store(Vector128<ulong> X, byte[] z)
{
AsBytes(X.GetElement(1), X.GetElement(0), z);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ulong> Square(Vector128<ulong> X)
{
SquareExt(X, out Vector128<ulong> Z, out Vector128<ulong> Z2);
return Reduce2(Z, Z2);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void SquareExt(Vector128<ulong> X, out Vector128<ulong> Z0, out Vector128<ulong> Z2)
{
if (!Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled)
throw new PlatformNotSupportedException("SquareExt");
Z0 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(X, X, 0);
Z2 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(X, X, 17);
}
internal static void MultiplyP7(ref FieldElement x)
{
ulong n = x.n0;
ulong n2 = x.n1;
ulong num = n2 << 57;
x.n0 = ((n >> 7) ^ num ^ (num >> 1) ^ (num >> 2) ^ (num >> 7));
x.n1 = ((n2 >> 7) | (n << 57));
}
internal static void MultiplyP8(ref FieldElement x)
{
ulong n = x.n0;
ulong n2 = x.n1;
ulong num = n2 << 56;
x.n0 = ((n >> 8) ^ num ^ (num >> 1) ^ (num >> 2) ^ (num >> 7));
x.n1 = ((n2 >> 8) | (n << 56));
}
internal static void MultiplyP8(ref FieldElement x, out FieldElement y)
{
ulong n = x.n0;
ulong n2 = x.n1;
ulong num = n2 << 56;
y.n0 = ((n >> 8) ^ num ^ (num >> 1) ^ (num >> 2) ^ (num >> 7));
y.n1 = ((n2 >> 8) | (n << 56));
}
internal static void MultiplyP16(ref FieldElement x)
{
ulong n = x.n0;
ulong n2 = x.n1;
ulong num = n2 << 48;
x.n0 = ((n >> 16) ^ num ^ (num >> 1) ^ (num >> 2) ^ (num >> 7));
x.n1 = ((n2 >> 16) | (n << 48));
}
internal static void Square(ref FieldElement x)
{
ulong low;
ulong num = Interleave.Expand64To128Rev(x.n0, out low);
ulong low2;
ulong num2 = Interleave.Expand64To128Rev(x.n1, out low2);
ulong num3 = num ^ num2 ^ (num2 >> 1) ^ (num2 >> 2) ^ (num2 >> 7);
ulong num4 = low2 ^ (num2 << 62) ^ (num2 << 57);
x.n0 = (low ^ num4 ^ (num4 >> 1) ^ (num4 >> 2) ^ (num4 >> 7));
x.n1 = (num3 ^ (low2 << 62) ^ (low2 << 57));
}
internal static void Xor(byte[] x, byte[] y)
{
int num = 0;
do {
x[num] ^= y[num];
num++;
x[num] ^= y[num];
num++;
x[num] ^= y[num];
num++;
x[num] ^= y[num];
num++;
} while (num < 16);
}
internal static void Xor(byte[] x, byte[] y, int yOff)
{
int num = 0;
do {
x[num] ^= y[yOff + num];
num++;
x[num] ^= y[yOff + num];
num++;
x[num] ^= y[yOff + num];
num++;
x[num] ^= y[yOff + num];
num++;
} while (num < 16);
}
internal static void Xor(byte[] x, byte[] y, int yOff, int yLen)
{
while (--yLen >= 0) {
x[yLen] ^= y[yOff + yLen];
}
}
internal static void Xor(byte[] x, int xOff, byte[] y, int yOff, int len)
{
while (--len >= 0) {
x[xOff + len] ^= y[yOff + len];
}
}
internal static void Xor(ref FieldElement x, ref FieldElement y)
{
x.n0 ^= y.n0;
x.n1 ^= y.n1;
}
internal static void Xor(ref FieldElement x, ref FieldElement y, out FieldElement z)
{
z.n0 = (x.n0 ^ y.n0);
z.n1 = (x.n1 ^ y.n1);
}
internal static void Xor(Span<byte> x, ReadOnlySpan<byte> y)
{
int num = 0;
do {
x[num] ^= y[num];
num++;
x[num] ^= y[num];
num++;
x[num] ^= y[num];
num++;
x[num] ^= y[num];
num++;
} while (num < 16);
}
internal static void Xor(Span<byte> x, ReadOnlySpan<byte> y, int len)
{
for (int i = 0; i < len; i++) {
x[i] ^= y[i];
}
}
private static ulong ImplMul64(ulong x, ulong y)
{
ulong num = x & 1229782938247303441;
ulong num2 = x & 2459565876494606882;
ulong num3 = x & 4919131752989213764;
ulong num4 = (ulong)((long)x & -8608480567731124088);
ulong num5 = y & 1229782938247303441;
ulong num6 = y & 2459565876494606882;
ulong num7 = y & 4919131752989213764;
ulong num8 = (ulong)((long)y & -8608480567731124088);
ulong num9 = (num * num5) ^ (num2 * num8) ^ (num3 * num7) ^ (num4 * num6);
ulong num10 = (num * num6) ^ (num2 * num5) ^ (num3 * num8) ^ (num4 * num7);
ulong num11 = (num * num7) ^ (num2 * num6) ^ (num3 * num5) ^ (num4 * num8);
ulong num12 = (num * num8) ^ (num2 * num7) ^ (num3 * num6) ^ (num4 * num5);
num9 &= 1229782938247303441;
num10 &= 2459565876494606882;
num11 &= 4919131752989213764;
num12 = (ulong)((long)num12 & -8608480567731124088);
return num9 | num10 | num11 | num12;
}
}
}