<PackageReference Include="BouncyCastle.Cryptography" Version="2.5.0" />

SparkleEngine

public sealed class SparkleEngine : IAeadCipher
Sparkle v1.2, based on the current round 3 submission, https://sparkle-lwc.github.io/ .
using Org.BouncyCastle.Crypto.Modes; using Org.BouncyCastle.Crypto.Parameters; using Org.BouncyCastle.Crypto.Utilities; using Org.BouncyCastle.Runtime.Intrinsics; using Org.BouncyCastle.Runtime.Intrinsics.X86; using Org.BouncyCastle.Utilities; using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace Org.BouncyCastle.Crypto.Engines { public sealed class SparkleEngine : IAeadCipher { public enum SparkleParameters { SCHWAEMM128_128, SCHWAEMM256_128, SCHWAEMM192_192, SCHWAEMM256_256 } private enum State { Uninitialized, EncInit, EncAad, EncData, EncFinal, DecInit, DecAad, DecData, DecFinal } private static readonly uint[] RCON = new uint[8] { 3084996962, 3211876480, 951376470, 844003128, 3138487787, 1333558103, 3485442504, 3266521405 }; private string algorithmName; private readonly uint[] state; private readonly uint[] k; private readonly uint[] npub; private byte[] tag; private bool encrypted; private State m_state; private byte[] initialAssociatedText; private readonly int m_bufferSizeDecrypt; private readonly byte[] m_buf; private int m_bufPos; private readonly int SCHWAEMM_KEY_LEN; private readonly int SCHWAEMM_NONCE_LEN; private readonly int SPARKLE_STEPS_SLIM; private readonly int SPARKLE_STEPS_BIG; private readonly int KEY_BYTES; private readonly int KEY_WORDS; private readonly int TAG_WORDS; private readonly int TAG_BYTES; private readonly int STATE_WORDS; private readonly int RATE_WORDS; private readonly int RATE_BYTES; private readonly int CAP_MASK; private readonly uint _A0; private readonly uint _A1; private readonly uint _M2; private readonly uint _M3; public string AlgorithmName => algorithmName; public SparkleEngine(SparkleParameters sparkleParameters) { int num; int num2; int num3; switch (sparkleParameters) { case SparkleParameters.SCHWAEMM128_128: SCHWAEMM_KEY_LEN = 128; SCHWAEMM_NONCE_LEN = 128; num = 128; num2 = 256; num3 = 128; SPARKLE_STEPS_SLIM = 7; SPARKLE_STEPS_BIG = 10; algorithmName = "SCHWAEMM128-128"; break; case SparkleParameters.SCHWAEMM256_128: SCHWAEMM_KEY_LEN = 128; SCHWAEMM_NONCE_LEN = 256; num = 128; num2 = 384; num3 = 128; SPARKLE_STEPS_SLIM = 7; SPARKLE_STEPS_BIG = 11; algorithmName = "SCHWAEMM256-128"; break; case SparkleParameters.SCHWAEMM192_192: SCHWAEMM_KEY_LEN = 192; SCHWAEMM_NONCE_LEN = 192; num = 192; num2 = 384; num3 = 192; SPARKLE_STEPS_SLIM = 7; SPARKLE_STEPS_BIG = 11; algorithmName = "SCHWAEMM192-192"; break; case SparkleParameters.SCHWAEMM256_256: SCHWAEMM_KEY_LEN = 256; SCHWAEMM_NONCE_LEN = 256; num = 256; num2 = 512; num3 = 256; SPARKLE_STEPS_SLIM = 8; SPARKLE_STEPS_BIG = 12; algorithmName = "SCHWAEMM256-256"; break; default: throw new ArgumentException("Invalid definition of SCHWAEMM instance"); } KEY_WORDS = SCHWAEMM_KEY_LEN >> 5; KEY_BYTES = SCHWAEMM_KEY_LEN >> 3; TAG_WORDS = num >> 5; TAG_BYTES = num >> 3; STATE_WORDS = num2 >> 5; RATE_WORDS = SCHWAEMM_NONCE_LEN >> 5; RATE_BYTES = SCHWAEMM_NONCE_LEN >> 3; int num4 = num3 >> 6; int num5 = num3 >> 5; CAP_MASK = ((RATE_WORDS > num5) ? (num5 - 1) : (-1)); _A0 = (uint)(1 << num4 << 24); _A1 = (uint)((1 ^ (1 << num4)) << 24); _M2 = (uint)((2 ^ (1 << num4)) << 24); _M3 = (uint)((3 ^ (1 << num4)) << 24); state = new uint[STATE_WORDS]; k = new uint[KEY_WORDS]; npub = new uint[RATE_WORDS]; m_bufferSizeDecrypt = RATE_BYTES + TAG_BYTES; m_buf = new byte[m_bufferSizeDecrypt]; } public int GetKeyBytesSize() { return KEY_BYTES; } public int GetIVBytesSize() { return RATE_BYTES; } public void Init(bool forEncryption, ICipherParameters parameters) { AeadParameters aeadParameters = parameters as AeadParameters; KeyParameter keyParameter; ReadOnlySpan<byte> bs; if (aeadParameters != null) { keyParameter = aeadParameters.Key; bs = aeadParameters.Nonce; initialAssociatedText = aeadParameters.GetAssociatedText(); int macSize = aeadParameters.MacSize; if (macSize != TAG_BYTES * 8) throw new ArgumentException("Invalid value for MAC size: " + macSize.ToString()); } else { ParametersWithIV parametersWithIV = parameters as ParametersWithIV; if (parametersWithIV == null) throw new ArgumentException("invalid parameters passed to Sparkle"); keyParameter = (parametersWithIV.Parameters as KeyParameter); bs = parametersWithIV.IV; initialAssociatedText = null; } if (keyParameter == null) throw new ArgumentException("Sparkle Init parameters must include a key"); int num = KEY_WORDS * 4; if (num != keyParameter.KeyLength) throw new ArgumentException(algorithmName + " requires exactly " + num.ToString() + " bytes of key"); int num2 = RATE_WORDS * 4; if (num2 != bs.Length) throw new ArgumentException(algorithmName + " requires exactly " + num2.ToString() + " bytes of IV"); Pack.LE_To_UInt32(keyParameter.Key, k); Pack.LE_To_UInt32(bs, npub); m_state = (forEncryption ? State.EncInit : State.DecInit); Reset(); } public void ProcessAadByte(byte input) { CheckAad(); if (m_bufPos == RATE_BYTES) { ProcessBufferAad(m_buf); m_bufPos = 0; } m_buf[m_bufPos++] = input; } public void ProcessAadBytes(byte[] inBytes, int inOff, int len) { Check.DataLength(inBytes, inOff, len, "input buffer too short"); ProcessAadBytes(inBytes.AsSpan(inOff, len)); } public void ProcessAadBytes(ReadOnlySpan<byte> input) { if (!input.IsEmpty) { CheckAad(); if (m_bufPos > 0) { int num = RATE_BYTES - m_bufPos; if (input.Length <= num) { input.CopyTo(m_buf.AsSpan(m_bufPos)); m_bufPos += input.Length; return; } input.Slice(0, num).CopyTo(m_buf.AsSpan(m_bufPos)); int num2 = num; input = input.Slice(num2, input.Length - num2); ProcessBufferAad(m_buf); } while (input.Length > RATE_BYTES) { ProcessBufferAad(input); int num2 = RATE_BYTES; input = input.Slice(num2, input.Length - num2); } input.CopyTo(m_buf); m_bufPos = input.Length; } } public int ProcessByte(byte input, byte[] outBytes, int outOff) { return ProcessByte(input, Spans.FromNullable(outBytes, outOff)); } public unsafe int ProcessByte(byte input, Span<byte> output) { byte* intPtr = stackalloc byte[1]; *intPtr = input; Span<byte> span = new Span<byte>(intPtr, 1); return ProcessBytes(span, output); } public int ProcessBytes(byte[] inBytes, int inOff, int len, byte[] outBytes, int outOff) { Check.DataLength(inBytes, inOff, len, "input buffer too short"); return ProcessBytes(inBytes.AsSpan(inOff, len), Spans.FromNullable(outBytes, outOff)); } public int ProcessBytes(ReadOnlySpan<byte> input, Span<byte> output) { bool num = CheckData(); int num2 = 0; ReadOnlySpan<byte> readOnlySpan; if (num) { if (m_bufPos > 0) { int num3 = RATE_BYTES - m_bufPos; if (input.Length <= num3) { input.CopyTo(m_buf.AsSpan(m_bufPos)); m_bufPos += input.Length; return 0; } readOnlySpan = input.Slice(0, num3); readOnlySpan.CopyTo(m_buf.AsSpan(m_bufPos)); int num4 = num3; input = input.Slice(num4, input.Length - num4); ProcessBufferEncrypt(m_buf, output); num2 = RATE_BYTES; } while (input.Length > RATE_BYTES) { ReadOnlySpan<byte> buffer = input; int num4 = num2; ProcessBufferEncrypt(buffer, output.Slice(num4, output.Length - num4)); num4 = RATE_BYTES; input = input.Slice(num4, input.Length - num4); num2 += RATE_BYTES; } } else { int num5 = m_bufferSizeDecrypt - m_bufPos; if (input.Length <= num5) { input.CopyTo(m_buf.AsSpan(m_bufPos)); m_bufPos += input.Length; return 0; } if (m_bufPos > RATE_BYTES) { ProcessBufferDecrypt(m_buf, output); m_bufPos -= RATE_BYTES; m_buf.AsSpan(0, m_bufPos).CopyFrom(m_buf.AsSpan(RATE_BYTES)); num2 = RATE_BYTES; num5 += RATE_BYTES; if (input.Length <= num5) { input.CopyTo(m_buf.AsSpan(m_bufPos)); m_bufPos += input.Length; return num2; } } num5 = RATE_BYTES - m_bufPos; readOnlySpan = input.Slice(0, num5); readOnlySpan.CopyTo(m_buf.AsSpan(m_bufPos)); int num4 = num5; input = input.Slice(num4, input.Length - num4); ReadOnlySpan<byte> buffer2 = m_buf; num4 = num2; ProcessBufferDecrypt(buffer2, output.Slice(num4, output.Length - num4)); num2 += RATE_BYTES; while (input.Length > m_bufferSizeDecrypt) { ReadOnlySpan<byte> buffer3 = input; num4 = num2; ProcessBufferDecrypt(buffer3, output.Slice(num4, output.Length - num4)); num4 = RATE_BYTES; input = input.Slice(num4, input.Length - num4); num2 += RATE_BYTES; } } input.CopyTo(m_buf); m_bufPos = input.Length; return num2; } public int DoFinal(byte[] outBytes, int outOff) { bool flag = CheckData(); int num; if (flag) num = m_bufPos + TAG_BYTES; else { if (m_bufPos < TAG_BYTES) throw new InvalidCipherTextException("data too short"); m_bufPos -= TAG_BYTES; num = m_bufPos; } Check.OutputLength(outBytes, outOff, num, "output buffer too short"); if (encrypted || m_bufPos > 0) { state[STATE_WORDS - 1] ^= ((m_bufPos < RATE_BYTES) ? _M2 : _M3); uint[] array = new uint[RATE_WORDS]; for (int i = 0; i < m_bufPos; i++) { array[i >> 2] |= (uint)(m_buf[i] << ((i & 3) << 3)); } if (m_bufPos < RATE_BYTES) { if (!flag) { int num2 = (m_bufPos & 3) << 3; array[m_bufPos >> 2] |= state[m_bufPos >> 2] >> num2 << num2; num2 = (m_bufPos >> 2) + 1; Array.Copy(state, num2, array, num2, RATE_WORDS - num2); } array[m_bufPos >> 2] ^= (uint)(128 << ((m_bufPos & 3) << 3)); } for (int j = 0; j < RATE_WORDS / 2; j++) { int num3 = j + RATE_WORDS / 2; uint num4 = state[j]; uint num5 = state[num3]; if (flag) { state[j] = (num5 ^ array[j] ^ state[RATE_WORDS + j]); state[num3] = (num4 ^ num5 ^ array[num3] ^ state[RATE_WORDS + (num3 & CAP_MASK)]); } else { state[j] = (num4 ^ num5 ^ array[j] ^ state[RATE_WORDS + j]); state[num3] = (num4 ^ array[num3] ^ state[RATE_WORDS + (num3 & CAP_MASK)]); } array[j] ^= num4; array[num3] ^= num5; } for (int k = 0; k < m_bufPos; k++) { outBytes[outOff++] = (byte)(array[k >> 2] >> ((k & 3) << 3)); } SparkleOpt(state, SPARKLE_STEPS_BIG); } for (int l = 0; l < KEY_WORDS; l++) { state[RATE_WORDS + l] ^= this.k[l]; } tag = new byte[TAG_BYTES]; Pack.UInt32_To_LE(state, RATE_WORDS, TAG_WORDS, tag, 0); if (flag) Array.Copy(tag, 0, outBytes, outOff, TAG_BYTES); else if (!Arrays.FixedTimeEquals(TAG_BYTES, tag, 0, m_buf, m_bufPos)) { throw new InvalidCipherTextException("mac check in " + AlgorithmName + " failed"); } Reset(!flag); return num; } public int DoFinal(Span<byte> output) { byte[] array = new byte[GetOutputSize(0)]; int num = DoFinal(array, 0); array.AsSpan(0, num).CopyTo(output); return num; } public byte[] GetMac() { return tag; } public int GetUpdateOutputSize(int len) { int num = System.Math.Max(0, len) - 1; switch (m_state) { case State.DecInit: case State.DecAad: num = System.Math.Max(0, num - TAG_BYTES); break; case State.DecData: case State.DecFinal: num = System.Math.Max(0, num + m_bufPos - TAG_BYTES); break; case State.EncData: case State.EncFinal: num = System.Math.Max(0, num + m_bufPos); break; } return num - num % RATE_BYTES; } public int GetOutputSize(int len) { int num = System.Math.Max(0, len); switch (m_state) { case State.DecInit: case State.DecAad: return System.Math.Max(0, num - TAG_BYTES); case State.DecData: case State.DecFinal: return System.Math.Max(0, num + m_bufPos - TAG_BYTES); case State.EncData: case State.EncFinal: return num + m_bufPos + TAG_BYTES; default: return num + TAG_BYTES; } } public void Reset() { Reset(true); } private void CheckAad() { switch (m_state) { case State.EncAad: case State.DecAad: break; case State.DecInit: m_state = State.DecAad; break; case State.EncInit: m_state = State.EncAad; break; case State.EncFinal: throw new InvalidOperationException(AlgorithmName + " cannot be reused for encryption"); default: throw new InvalidOperationException(AlgorithmName + " needs to be initialized"); } } private bool CheckData() { switch (m_state) { case State.DecInit: case State.DecAad: FinishAad(State.DecData); return false; case State.EncInit: case State.EncAad: FinishAad(State.EncData); return true; case State.DecData: return false; case State.EncData: return true; case State.EncFinal: throw new InvalidOperationException(AlgorithmName + " cannot be reused for encryption"); default: throw new InvalidOperationException(AlgorithmName + " needs to be initialized"); } } private void FinishAad(State nextState) { State state = m_state; if (state == State.EncAad || state == State.DecAad) ProcessFinalAad(); m_bufPos = 0; m_state = nextState; } private void ProcessBufferAad(ReadOnlySpan<byte> buffer) { for (int i = 0; i < RATE_WORDS / 2; i++) { int num = i + (RATE_WORDS >> 1); uint num2 = state[i]; uint num3 = state[num]; uint num4 = Pack.LE_To_UInt32(buffer, i << 2); uint num5 = Pack.LE_To_UInt32(buffer, num << 2); state[i] = (num3 ^ num4 ^ state[RATE_WORDS + i]); state[num] = (num2 ^ num3 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]); } SparkleOpt(state, SPARKLE_STEPS_SLIM); } private void ProcessBufferDecrypt(ReadOnlySpan<byte> buffer, Span<byte> output) { Check.OutputLength(output, RATE_BYTES, "output buffer too short"); for (int i = 0; i < RATE_WORDS / 2; i++) { int num = i + RATE_WORDS / 2; uint num2 = state[i]; uint num3 = state[num]; uint num4 = Pack.LE_To_UInt32(buffer, i * 4); uint num5 = Pack.LE_To_UInt32(buffer, num * 4); state[i] = (num2 ^ num3 ^ num4 ^ state[RATE_WORDS + i]); state[num] = (num2 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]); Pack.UInt32_To_LE(num4 ^ num2, output, i * 4); Pack.UInt32_To_LE(num5 ^ num3, output, num * 4); } SparkleOpt(state, SPARKLE_STEPS_SLIM); encrypted = true; } private void ProcessBufferEncrypt(ReadOnlySpan<byte> buffer, Span<byte> output) { Check.OutputLength(output, RATE_BYTES, "output buffer too short"); for (int i = 0; i < RATE_WORDS / 2; i++) { int num = i + RATE_WORDS / 2; uint num2 = state[i]; uint num3 = state[num]; uint num4 = Pack.LE_To_UInt32(buffer, i * 4); uint num5 = Pack.LE_To_UInt32(buffer, num * 4); state[i] = (num3 ^ num4 ^ state[RATE_WORDS + i]); state[num] = (num2 ^ num3 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]); Pack.UInt32_To_LE(num4 ^ num2, output, i * 4); Pack.UInt32_To_LE(num5 ^ num3, output, num * 4); } SparkleOpt(state, SPARKLE_STEPS_SLIM); encrypted = true; } private void ProcessFinalAad() { if (m_bufPos < RATE_BYTES) { state[STATE_WORDS - 1] ^= _A0; m_buf[m_bufPos] = 128; while (++m_bufPos < RATE_BYTES) { m_buf[m_bufPos] = 0; } } else state[STATE_WORDS - 1] ^= _A1; for (int i = 0; i < RATE_WORDS / 2; i++) { int num = i + RATE_WORDS / 2; uint num2 = state[i]; uint num3 = state[num]; uint num4 = Pack.LE_To_UInt32(m_buf, i * 4); uint num5 = Pack.LE_To_UInt32(m_buf, num * 4); state[i] = (num3 ^ num4 ^ state[RATE_WORDS + i]); state[num] = (num2 ^ num3 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]); } SparkleOpt(state, SPARKLE_STEPS_BIG); } private void Reset(bool clearMac) { if (clearMac) tag = null; Arrays.Clear(m_buf); m_bufPos = 0; encrypted = false; switch (m_state) { case State.DecAad: case State.DecData: case State.DecFinal: m_state = State.DecInit; break; case State.EncAad: case State.EncData: case State.EncFinal: m_state = State.EncFinal; return; default: throw new InvalidOperationException(AlgorithmName + " needs to be initialized"); case State.EncInit: case State.DecInit: break; } Array.Copy(npub, 0, state, 0, RATE_WORDS); Array.Copy(k, 0, state, RATE_WORDS, KEY_WORDS); SparkleOpt(state, SPARKLE_STEPS_BIG); if (initialAssociatedText != null) ProcessAadBytes(initialAssociatedText); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ArxBox(uint rc, ref uint s00, ref uint s01) { s00 += Integers.RotateRight(s01, 31); s01 ^= Integers.RotateRight(s00, 24); s00 ^= rc; s00 += Integers.RotateRight(s01, 17); s01 ^= Integers.RotateRight(s00, 17); s00 ^= rc; s00 += s01; s01 ^= Integers.RotateRight(s00, 31); s00 ^= rc; s00 += Integers.RotateRight(s01, 24); s01 ^= Integers.RotateRight(s00, 16); s00 ^= rc; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static uint ELL(uint x) { return Integers.RotateRight(x, 16) ^ (x & 65535); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void SparkleOpt(uint[] state, int steps) { switch (state.LongLength) { case 8: SparkleOpt8(state, steps); break; case 12: SparkleOpt12(state, steps); break; case 16: SparkleOpt16(state, steps); break; default: throw new InvalidOperationException(); } } internal static void SparkleOpt8(uint[] state, int steps) { uint s = state[0]; uint s2 = state[1]; uint s3 = state[2]; uint s4 = state[3]; uint s5 = state[4]; uint s6 = state[5]; uint s7 = state[6]; uint s8 = state[7]; for (int i = 0; i < steps; i++) { s2 ^= RCON[i & 7]; s4 = (uint)((int)s4 ^ i); ArxBox(RCON[0], ref s, ref s2); ArxBox(RCON[1], ref s3, ref s4); ArxBox(RCON[2], ref s5, ref s6); ArxBox(RCON[3], ref s7, ref s8); uint num = ELL(s ^ s3); uint num2 = ELL(s2 ^ s4); uint num3 = s ^ s5; uint num4 = s2 ^ s6; uint num5 = s3 ^ s7; uint num6 = s4 ^ s8; s5 = s; s6 = s2; s7 = s3; s8 = s4; s = (num5 ^ num2); s2 = (num6 ^ num); s3 = (num3 ^ num2); s4 = (num4 ^ num); } state[0] = s; state[1] = s2; state[2] = s3; state[3] = s4; state[4] = s5; state[5] = s6; state[6] = s7; state[7] = s8; } internal static void SparkleOpt12(uint[] state, int steps) { uint s = state[0]; uint s2 = state[1]; uint s3 = state[2]; uint s4 = state[3]; uint s5 = state[4]; uint s6 = state[5]; uint s7 = state[6]; uint s8 = state[7]; uint s9 = state[8]; uint s10 = state[9]; uint s11 = state[10]; uint s12 = state[11]; for (int i = 0; i < steps; i++) { s2 ^= RCON[i & 7]; s4 = (uint)((int)s4 ^ i); ArxBox(RCON[0], ref s, ref s2); ArxBox(RCON[1], ref s3, ref s4); ArxBox(RCON[2], ref s5, ref s6); ArxBox(RCON[3], ref s7, ref s8); ArxBox(RCON[4], ref s9, ref s10); ArxBox(RCON[5], ref s11, ref s12); uint num = ELL(s ^ s3 ^ s5); uint num2 = ELL(s2 ^ s4 ^ s6); uint num3 = s ^ s7; uint num4 = s2 ^ s8; uint num5 = s3 ^ s9; uint num6 = s4 ^ s10; uint num7 = s5 ^ s11; uint num8 = s6 ^ s12; s7 = s; s8 = s2; s9 = s3; s10 = s4; s11 = s5; s12 = s6; s = (num5 ^ num2); s2 = (num6 ^ num); s3 = (num7 ^ num2); s4 = (num8 ^ num); s5 = (num3 ^ num2); s6 = (num4 ^ num); } state[0] = s; state[1] = s2; state[2] = s3; state[3] = s4; state[4] = s5; state[5] = s6; state[6] = s7; state[7] = s8; state[8] = s9; state[9] = s10; state[10] = s11; state[11] = s12; } internal static void SparkleOpt16(uint[] state, int steps) { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled) { Vector128<uint> s = Vector128.Create(state[0], state[2], state[4], state[6]); Vector128<uint> s2 = Vector128.Create(state[1], state[3], state[5], state[7]); Vector128<uint> s3 = Vector128.Create(state[8], state[10], state[12], state[14]); Vector128<uint> s4 = Vector128.Create(state[9], state[11], state[13], state[15]); Vector128<uint> rc = Load128(RCON.AsSpan(0)); Vector128<uint> rc2 = Load128(RCON.AsSpan(4)); for (int i = 0; i < steps; i++) { s2 = System.Runtime.Intrinsics.X86.Sse2.Xor(s2, Vector128.Create(RCON[i & 7], (uint)i, 0, 0)); ArxBox(rc, ref s, ref s2); ArxBox(rc2, ref s3, ref s4); Vector128<uint> left = ELL(HorizontalXor(s)); Vector128<uint> left2 = ELL(HorizontalXor(s2)); Vector128<uint> value = System.Runtime.Intrinsics.X86.Sse2.Xor(s, s3); Vector128<uint> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(s2, s4); s3 = s; s4 = s2; s = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, System.Runtime.Intrinsics.X86.Sse2.Shuffle(value, 57)); s2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left, System.Runtime.Intrinsics.X86.Sse2.Shuffle(value2, 57)); } Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackLow(s, s2), state.AsSpan(0)); Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(s, s2), state.AsSpan(4)); Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackLow(s3, s4), state.AsSpan(8)); Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(s3, s4), state.AsSpan(12)); } else { uint s5 = state[0]; uint s6 = state[1]; uint s7 = state[2]; uint s8 = state[3]; uint s9 = state[4]; uint s10 = state[5]; uint s11 = state[6]; uint s12 = state[7]; uint s13 = state[8]; uint s14 = state[9]; uint s15 = state[10]; uint s16 = state[11]; uint s17 = state[12]; uint s18 = state[13]; uint s19 = state[14]; uint s20 = state[15]; int num = 0; while (num < steps) { s6 ^= RCON[num & 7]; s8 = (uint)((int)s8 ^ num++); ArxBox(RCON[0], ref s5, ref s6); ArxBox(RCON[1], ref s7, ref s8); ArxBox(RCON[2], ref s9, ref s10); ArxBox(RCON[3], ref s11, ref s12); ArxBox(RCON[4], ref s13, ref s14); ArxBox(RCON[5], ref s15, ref s16); ArxBox(RCON[6], ref s17, ref s18); ArxBox(RCON[7], ref s19, ref s20); uint num3 = ELL(s5 ^ s7 ^ s9 ^ s11); uint num4 = ELL(s6 ^ s8 ^ s10 ^ s12); uint num5 = s13; uint num6 = s14; s13 = (s7 ^ s15 ^ num4); s14 = (s8 ^ s16 ^ num3); s15 = (s9 ^ s17 ^ num4); s16 = (s10 ^ s18 ^ num3); s17 = (s11 ^ s19 ^ num4); s18 = (s12 ^ s20 ^ num3); s19 = (s5 ^ num5 ^ num4); s20 = (s6 ^ num6 ^ num3); s14 ^= RCON[num & 7]; s16 = (uint)((int)s16 ^ num++); ArxBox(RCON[0], ref s13, ref s14); ArxBox(RCON[1], ref s15, ref s16); ArxBox(RCON[2], ref s17, ref s18); ArxBox(RCON[3], ref s19, ref s20); ArxBox(RCON[4], ref s5, ref s6); ArxBox(RCON[5], ref s7, ref s8); ArxBox(RCON[6], ref s9, ref s10); ArxBox(RCON[7], ref s11, ref s12); uint num8 = ELL(s13 ^ s15 ^ s17 ^ s19); uint num9 = ELL(s14 ^ s16 ^ s18 ^ s20); uint num10 = s5; uint num11 = s6; s5 = (s7 ^ s15 ^ num9); s6 = (s8 ^ s16 ^ num8); s7 = (s9 ^ s17 ^ num9); s8 = (s10 ^ s18 ^ num8); s9 = (s11 ^ s19 ^ num9); s10 = (s12 ^ s20 ^ num8); s11 = (num10 ^ s13 ^ num9); s12 = (num11 ^ s14 ^ num8); } state[0] = s5; state[1] = s6; state[2] = s7; state[3] = s8; state[4] = s9; state[5] = s10; state[6] = s11; state[7] = s12; state[8] = s13; state[9] = s14; state[10] = s15; state[11] = s16; state[12] = s17; state[13] = s18; state[14] = s19; state[15] = s20; } } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ArxBox(Vector128<uint> rc, ref Vector128<uint> s00, ref Vector128<uint> s01) { s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s01, 31)); s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s01, 1)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 24)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 8)); s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc); s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s01, 17)); s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s01, 15)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 17)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 15)); s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc); s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, s01); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 31)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 1)); s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc); s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s01, 24)); s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s01, 8)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 16)); s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 16)); s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128<uint> ELL(Vector128<uint> x) { Vector128<uint> vector = System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(x, 16); Vector128<uint> value = System.Runtime.Intrinsics.X86.Sse2.Xor(x, vector); return System.Runtime.Intrinsics.X86.Sse2.Xor(vector, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(value, 16)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128<uint> HorizontalXor(Vector128<uint> x) { Vector128<uint> vector = System.Runtime.Intrinsics.X86.Sse2.Xor(x, System.Runtime.Intrinsics.X86.Sse2.Shuffle(x, 27)); return System.Runtime.Intrinsics.X86.Sse2.Xor(vector, System.Runtime.Intrinsics.X86.Sse2.Shuffle(vector, 177)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128<uint> Load128(ReadOnlySpan<uint> t) { if (Vector.IsPackedLittleEndian) return MemoryMarshal.Read<Vector128<uint>>(MemoryMarshal.AsBytes(t)); return Vector128.Create(t[0], t[1], t[2], t[3]); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Store128(Vector128<uint> s, Span<uint> t) { Span<byte> destination = MemoryMarshal.AsBytes(t); if (Vector.IsPackedLittleEndian) MemoryMarshal.Write(destination, ref s); else { Vector128<ulong> vector = s.AsUInt64(); BinaryPrimitives.WriteUInt64LittleEndian(destination.Slice(0, 8), vector.GetElement(0)); BinaryPrimitives.WriteUInt64LittleEndian(destination.Slice(8, destination.Length - 8), vector.GetElement(1)); } } } }