<PackageReference Include="BouncyCastle.Cryptography" Version="2.5.1" />

BikeRing

sealed class BikeRing
using Org.BouncyCastle.Crypto.Utilities; using Org.BouncyCastle.Math.Raw; using Org.BouncyCastle.Runtime.Intrinsics.X86; using Org.BouncyCastle.Utilities; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace Org.BouncyCastle.Pqc.Crypto.Bike { internal sealed class BikeRing { private const int PermutationCutoff = 64; private readonly int m_bits; private readonly int m_size; private readonly int m_sizeExt; private readonly Dictionary<int, int> m_halfPowers = new Dictionary<int, int>(); internal int Size => m_size; internal int SizeExt => m_sizeExt; internal BikeRing(int r) { if ((r & 4294901761) != 1) throw new ArgumentException(); m_bits = r; m_size = r + 63 >> 6; m_sizeExt = m_size * 2; uint r2 = Mod.Inverse32((uint)(-r)); foreach (int item in EnumerateSquarePowersInv(r)) { if (item >= 64 && !m_halfPowers.ContainsKey(item)) m_halfPowers[item] = GenerateHalfPower((uint)r, r2, item); } } internal void Add(ulong[] x, ulong[] y, ulong[] z) { Nat.Xor64(Size, x, y, z); } internal void AddTo(ulong[] x, ulong[] z) { Nat.XorTo64(Size, x, z); } internal void Copy(ulong[] x, ulong[] z) { for (int i = 0; i < Size; i++) { z[i] = x[i]; } } internal ulong[] Create() { return new ulong[Size]; } internal ulong[] CreateExt() { return new ulong[SizeExt]; } internal void DecodeBytes(byte[] bs, ulong[] z) { int len = (m_bits & 63) + 7 >> 3; Pack.LE_To_UInt64(bs, 0, z, 0, Size - 1); z[Size - 1] = Pack.LE_To_UInt64_Low(bs, Size - 1 << 3, len); } internal byte[] EncodeBitsTransposed(ulong[] x) { byte[] array = new byte[m_bits]; array[0] = (byte)(x[0] & 1); for (int i = 1; i < m_bits; i++) { array[m_bits - i] = (byte)((x[i >> 6] >> i) & 1); } return array; } internal void EncodeBytes(ulong[] x, byte[] bs) { int len = (m_bits & 63) + 7 >> 3; Pack.UInt64_To_LE(x, 0, Size - 1, bs, 0); Pack.UInt64_To_LE_Low(x[Size - 1], bs, Size - 1 << 3, len); } internal void Inv(ulong[] a, ulong[] z) { ulong[] array = Create(); ulong[] array2 = Create(); ulong[] array3 = Create(); Copy(a, array); Copy(a, array3); int num = m_bits - 2; int num2 = 32 - Integers.NumberOfLeadingZeros(num); for (int i = 1; i < num2; i++) { SquareN(array, 1 << i - 1, array2); Multiply(array, array2, array); if ((num & (1 << i)) != 0) { int n = num & ((1 << i) - 1); SquareN(array, n, array2); Multiply(array3, array2, array3); } } Square(array3, z); } internal void Multiply(ulong[] x, ulong[] y, ulong[] z) { Multiply(x, 0, y, 0, z); } internal void Multiply(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z) { ulong[] array = CreateExt(); ImplMultiplyAcc(x, xOff, y, yOff, array); Reduce(array, z); } internal void Reduce(ulong[] tt, ulong[] z) { int num = m_bits & 63; int num2 = 64 - num; ulong num3 = ulong.MaxValue >> num2; Nat.ShiftUpBits64(Size, tt, Size, num2, tt[Size - 1], z, 0); AddTo(tt, z); z[Size - 1] &= num3; } internal void Square(ulong[] x, ulong[] z) { ulong[] array = CreateExt(); ImplSquare(x, array); Reduce(array, z); } internal void SquareN(ulong[] x, int n, ulong[] z) { if (n >= 64) ImplPermute(x, n, z); else { ulong[] array = CreateExt(); ImplSquare(x, array); Reduce(array, z); while (--n > 0) { ImplSquare(z, array); Reduce(array, z); } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ImplModAdd(int m, int x, int y) { int num = x + y - m; return num + ((num >> 31) & m); } private void ImplMultiplyAcc(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] zz) { ulong num9 = x[xOff + Size - 1]; ulong num10 = y[yOff + Size - 1]; ulong num11 = zz[SizeExt - 1]; if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) { int i = 0; for (int num = Size - 4; i <= num; i += 4) { Vector128<ulong> left = Vector128.Create(x[xOff + i], x[xOff + i + 1]); Vector128<ulong> left2 = Vector128.Create(x[xOff + i + 2], x[xOff + i + 3]); for (int j = 0; j <= num; j += 4) { Vector128<ulong> right = Vector128.Create(y[yOff + j], y[yOff + j + 1]); Vector128<ulong> right2 = Vector128.Create(y[yOff + j + 2], y[yOff + j + 3]); Vector128<ulong> left3 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 0); Vector128<ulong> value = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 16)); Vector128<ulong> left4 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right, 17); Vector128<ulong> right3 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 0); Vector128<ulong> left5 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 16)); Vector128<ulong> right4 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left, right2, 17); Vector128<ulong> right5 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 0); Vector128<ulong> right6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 16)); Vector128<ulong> right7 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right, 17); Vector128<ulong> left6 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 0); Vector128<ulong> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 16)); Vector128<ulong> left7 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left2, right2, 17); left4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, right3); left4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, right5); Vector128<ulong> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left5, right6); left6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, right4); left6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, right7); left3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value, 8)); left4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value, 8)); left4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value3, 8)); left6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value3, 8)); left6 = System.Runtime.Intrinsics.X86.Sse2.Xor(left6, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(value2, 8)); left7 = System.Runtime.Intrinsics.X86.Sse2.Xor(left7, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(value2, 8)); zz[i + j] ^= left3.GetElement(0); zz[i + j + 1] ^= left3.GetElement(1); zz[i + j + 2] ^= left4.GetElement(0); zz[i + j + 3] ^= left4.GetElement(1); zz[i + j + 4] ^= left6.GetElement(0); zz[i + j + 5] ^= left6.GetElement(1); zz[i + j + 6] ^= left7.GetElement(0); zz[i + j + 7] ^= left7.GetElement(1); } } int num2 = Size - 2; if (i <= num2) { Vector128<ulong> left8 = Vector128.Create(x[xOff + i], x[xOff + i + 1]); Vector128<ulong> right8 = Vector128.Create(y[yOff + i], y[yOff + i + 1]); for (int k = 0; k < i; k += 2) { Vector128<ulong> left9 = Vector128.Create(x[xOff + k], x[xOff + k + 1]); Vector128<ulong> right9 = Vector128.Create(y[yOff + k], y[yOff + k + 1]); Vector128<ulong> left10 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right9, 0); Vector128<ulong> left11 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right9, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right9, 16)); Vector128<ulong> left12 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right9, 17); Vector128<ulong> right10 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left9, right8, 0); Vector128<ulong> right11 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left9, right8, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left9, right8, 16)); Vector128<ulong> right12 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left9, right8, 17); Vector128<ulong> vector = System.Runtime.Intrinsics.X86.Sse2.Xor(left10, right10); Vector128<ulong> vector2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left11, right11); Vector128<ulong> vector3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left12, right12); zz[i + k] ^= vector.GetElement(0); zz[i + k + 1] ^= (vector.GetElement(1) ^ vector2.GetElement(0)); zz[i + k + 2] ^= (vector3.GetElement(0) ^ vector2.GetElement(1)); zz[i + k + 3] ^= vector3.GetElement(1); } Vector128<ulong> vector4 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right8, 0); Vector128<ulong> vector5 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right8, 1), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right8, 16)); Vector128<ulong> vector6 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left8, right8, 17); zz[i + i] ^= vector4.GetElement(0); zz[i + i + 1] ^= (vector4.GetElement(1) ^ vector5.GetElement(0)); zz[i + i + 2] ^= (vector6.GetElement(0) ^ vector5.GetElement(1)); zz[i + i + 3] ^= vector6.GetElement(1); i += 2; } if (i < Size) { Vector128<ulong> left13 = Vector128.CreateScalar(x[xOff + i]); Vector128<ulong> vector7 = Vector128.CreateScalar(y[yOff + i]); for (int l = 0; l < i; l++) { Vector128<ulong> right13 = Vector128.CreateScalar(x[xOff + l]); Vector128<ulong> right14 = Vector128.CreateScalar(y[yOff + l]); Vector128<ulong> vector8 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left13, right14, 0), System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(vector7, right13, 0)); zz[i + l] ^= vector8.GetElement(0); zz[i + l + 1] ^= vector8.GetElement(1); } Vector128<ulong> vector9 = System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(left13, vector7, 0); zz[i + i] ^= vector9.GetElement(0); zz[i + i + 1] ^= vector9.GetElement(1); } } else { ulong[] u = new ulong[16]; for (int m = 0; m < Size; m++) { ImplMulwAcc(u, x[xOff + m], y[yOff + m], zz, m << 1); } ulong num3 = zz[0]; ulong num4 = zz[1]; for (int n = 1; n < Size; n++) { num3 ^= zz[n << 1]; zz[n] = (num3 ^ num4); num4 ^= zz[(n << 1) + 1]; } ulong y2 = num3 ^ num4; Nat.Xor64(Size, zz, 0, y2, zz, Size); int num5 = Size - 1; for (int num6 = 1; num6 < num5 * 2; num6++) { int num7 = System.Math.Min(num5, num6); int num8 = num6 - num7; while (num8 < num7) { ImplMulwAcc(u, x[xOff + num8] ^ x[xOff + num7], y[yOff + num8] ^ y[yOff + num7], zz, num6); num8++; num7--; } } } } private void ImplPermute(ulong[] x, int n, ulong[] z) { int bits = m_bits; int num = m_halfPowers[n]; int num2 = ImplModAdd(bits, num, num); int num3 = ImplModAdd(bits, num2, num2); int num4 = ImplModAdd(bits, num3, num3); int num5 = bits - num4; int num6 = ImplModAdd(bits, num5, num); int num7 = ImplModAdd(bits, num5, num2); int num8 = ImplModAdd(bits, num6, num2); int num9 = ImplModAdd(bits, num5, num3); int num10 = ImplModAdd(bits, num6, num3); int num11 = ImplModAdd(bits, num7, num3); int num12 = ImplModAdd(bits, num8, num3); for (int i = 0; i < Size; i++) { ulong num13 = 0; for (int j = 0; j < 64; j += 8) { num5 = ImplModAdd(bits, num5, num4); num6 = ImplModAdd(bits, num6, num4); num7 = ImplModAdd(bits, num7, num4); num8 = ImplModAdd(bits, num8, num4); num9 = ImplModAdd(bits, num9, num4); num10 = ImplModAdd(bits, num10, num4); num11 = ImplModAdd(bits, num11, num4); num12 = ImplModAdd(bits, num12, num4); num13 |= ((x[num5 >> 6] >> num5) & 1) << j; num13 |= ((x[num6 >> 6] >> num6) & 1) << j + 1; num13 |= ((x[num7 >> 6] >> num7) & 1) << j + 2; num13 |= ((x[num8 >> 6] >> num8) & 1) << j + 3; num13 |= ((x[num9 >> 6] >> num9) & 1) << j + 4; num13 |= ((x[num10 >> 6] >> num10) & 1) << j + 5; num13 |= ((x[num11 >> 6] >> num11) & 1) << j + 6; num13 |= ((x[num12 >> 6] >> num12) & 1) << j + 7; } z[i] = num13; } z[Size - 1] &= ulong.MaxValue >> -bits; } private static IEnumerable<int> EnumerateSquarePowersInv(int r) { int rSub2 = r - 2; int bits = 32 - Integers.NumberOfLeadingZeros(rSub2); int num2; for (int i = 1; i < bits; i = num2) { yield return 1 << i - 1; if ((rSub2 & (1 << i)) != 0) { int num = rSub2 & ((1 << i) - 1); yield return num; } num2 = i + 1; } } private static int GenerateHalfPower(uint r, uint r32, int n) { uint num = 1; int num2; for (num2 = n; num2 >= 32; num2 -= 32) { num = (uint)((ulong)((long)(r32 * num) * (long)r + num) >> 32); } if (num2 > 0) { uint num3 = uint.MaxValue >> -num2; num = (uint)((ulong)((long)((r32 * num) & num3) * (long)r + num) >> num2); } return (int)num; } private static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff) { u[1] = y; for (int i = 2; i < 16; i += 2) { u[i] = u[i >> 1] << 1; u[i + 1] = (u[i] ^ y); } uint num = (uint)x; ulong num2 = 0; ulong num3 = u[num & 15] ^ (u[(num >> 4) & 15] << 4); int num4 = 56; do { num = (uint)(x >> num4); ulong num5 = u[num & 15] ^ (u[(num >> 4) & 15] << 4); num3 ^= num5 << num4; num2 ^= num5 >> -num4; } while ((num4 -= 8) > 0); for (int j = 0; j < 7; j++) { x = (ulong)((long)x & -72340172838076674) >> 1; num2 = (ulong)((long)num2 ^ ((long)x & ((long)(y << j) >> 63))); } z[zOff] ^= num3; z[zOff + 1] ^= num2; } private void ImplSquare(ulong[] x, ulong[] zz) { Interleave.Expand64To128(x, 0, Size, zz, 0); } } }