SparkleEngine
Sparkle v1.2, based on the current round 3 submission, https://sparkle-lwc.github.io/ .
using Org.BouncyCastle.Crypto.Modes;
using Org.BouncyCastle.Crypto.Parameters;
using Org.BouncyCastle.Crypto.Utilities;
using Org.BouncyCastle.Runtime.Intrinsics;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using Org.BouncyCastle.Utilities;
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Crypto.Engines
{
public sealed class SparkleEngine : IAeadCipher
{
public enum SparkleParameters
{
SCHWAEMM128_128,
SCHWAEMM256_128,
SCHWAEMM192_192,
SCHWAEMM256_256
}
private enum State
{
Uninitialized,
EncInit,
EncAad,
EncData,
EncFinal,
DecInit,
DecAad,
DecData,
DecFinal
}
private static readonly uint[] RCON = new uint[8] {
3084996962,
3211876480,
951376470,
844003128,
3138487787,
1333558103,
3485442504,
3266521405
};
private string algorithmName;
private readonly uint[] state;
private readonly uint[] k;
private readonly uint[] npub;
private byte[] tag;
private bool encrypted;
private State m_state;
private byte[] initialAssociatedText;
private readonly int m_bufferSizeDecrypt;
private readonly byte[] m_buf;
private int m_bufPos;
private readonly int SCHWAEMM_KEY_LEN;
private readonly int SCHWAEMM_NONCE_LEN;
private readonly int SPARKLE_STEPS_SLIM;
private readonly int SPARKLE_STEPS_BIG;
private readonly int KEY_BYTES;
private readonly int KEY_WORDS;
private readonly int TAG_WORDS;
private readonly int TAG_BYTES;
private readonly int STATE_WORDS;
private readonly int RATE_WORDS;
private readonly int RATE_BYTES;
private readonly int CAP_MASK;
private readonly uint _A0;
private readonly uint _A1;
private readonly uint _M2;
private readonly uint _M3;
public string AlgorithmName => algorithmName;
public SparkleEngine(SparkleParameters sparkleParameters)
{
int num;
int num2;
int num3;
switch (sparkleParameters) {
case SparkleParameters.SCHWAEMM128_128:
SCHWAEMM_KEY_LEN = 128;
SCHWAEMM_NONCE_LEN = 128;
num = 128;
num2 = 256;
num3 = 128;
SPARKLE_STEPS_SLIM = 7;
SPARKLE_STEPS_BIG = 10;
algorithmName = "SCHWAEMM128-128";
break;
case SparkleParameters.SCHWAEMM256_128:
SCHWAEMM_KEY_LEN = 128;
SCHWAEMM_NONCE_LEN = 256;
num = 128;
num2 = 384;
num3 = 128;
SPARKLE_STEPS_SLIM = 7;
SPARKLE_STEPS_BIG = 11;
algorithmName = "SCHWAEMM256-128";
break;
case SparkleParameters.SCHWAEMM192_192:
SCHWAEMM_KEY_LEN = 192;
SCHWAEMM_NONCE_LEN = 192;
num = 192;
num2 = 384;
num3 = 192;
SPARKLE_STEPS_SLIM = 7;
SPARKLE_STEPS_BIG = 11;
algorithmName = "SCHWAEMM192-192";
break;
case SparkleParameters.SCHWAEMM256_256:
SCHWAEMM_KEY_LEN = 256;
SCHWAEMM_NONCE_LEN = 256;
num = 256;
num2 = 512;
num3 = 256;
SPARKLE_STEPS_SLIM = 8;
SPARKLE_STEPS_BIG = 12;
algorithmName = "SCHWAEMM256-256";
break;
default:
throw new ArgumentException("Invalid definition of SCHWAEMM instance");
}
KEY_WORDS = SCHWAEMM_KEY_LEN >> 5;
KEY_BYTES = SCHWAEMM_KEY_LEN >> 3;
TAG_WORDS = num >> 5;
TAG_BYTES = num >> 3;
STATE_WORDS = num2 >> 5;
RATE_WORDS = SCHWAEMM_NONCE_LEN >> 5;
RATE_BYTES = SCHWAEMM_NONCE_LEN >> 3;
int num4 = num3 >> 6;
int num5 = num3 >> 5;
CAP_MASK = ((RATE_WORDS > num5) ? (num5 - 1) : (-1));
_A0 = (uint)(1 << num4 << 24);
_A1 = (uint)((1 ^ (1 << num4)) << 24);
_M2 = (uint)((2 ^ (1 << num4)) << 24);
_M3 = (uint)((3 ^ (1 << num4)) << 24);
state = new uint[STATE_WORDS];
k = new uint[KEY_WORDS];
npub = new uint[RATE_WORDS];
m_bufferSizeDecrypt = RATE_BYTES + TAG_BYTES;
m_buf = new byte[m_bufferSizeDecrypt];
}
public int GetKeyBytesSize()
{
return KEY_BYTES;
}
public int GetIVBytesSize()
{
return RATE_BYTES;
}
public void Init(bool forEncryption, ICipherParameters parameters)
{
AeadParameters aeadParameters = parameters as AeadParameters;
KeyParameter keyParameter;
ReadOnlySpan<byte> bs;
if (aeadParameters != null) {
keyParameter = aeadParameters.Key;
bs = aeadParameters.Nonce;
initialAssociatedText = aeadParameters.GetAssociatedText();
int macSize = aeadParameters.MacSize;
if (macSize != TAG_BYTES * 8)
throw new ArgumentException("Invalid value for MAC size: " + macSize.ToString());
} else {
ParametersWithIV parametersWithIV = parameters as ParametersWithIV;
if (parametersWithIV == null)
throw new ArgumentException("invalid parameters passed to Sparkle");
keyParameter = (parametersWithIV.Parameters as KeyParameter);
bs = parametersWithIV.IV;
initialAssociatedText = null;
}
if (keyParameter == null)
throw new ArgumentException("Sparkle Init parameters must include a key");
int num = KEY_WORDS * 4;
if (num != keyParameter.KeyLength)
throw new ArgumentException(algorithmName + " requires exactly " + num.ToString() + " bytes of key");
int num2 = RATE_WORDS * 4;
if (num2 != bs.Length)
throw new ArgumentException(algorithmName + " requires exactly " + num2.ToString() + " bytes of IV");
Pack.LE_To_UInt32(keyParameter.Key, k);
Pack.LE_To_UInt32(bs, npub);
m_state = (forEncryption ? State.EncInit : State.DecInit);
Reset();
}
public void ProcessAadByte(byte input)
{
CheckAad();
if (m_bufPos == RATE_BYTES) {
ProcessBufferAad(m_buf);
m_bufPos = 0;
}
m_buf[m_bufPos++] = input;
}
public void ProcessAadBytes(byte[] inBytes, int inOff, int len)
{
Check.DataLength(inBytes, inOff, len, "input buffer too short");
ProcessAadBytes(inBytes.AsSpan(inOff, len));
}
public void ProcessAadBytes(ReadOnlySpan<byte> input)
{
if (!input.IsEmpty) {
CheckAad();
if (m_bufPos > 0) {
int num = RATE_BYTES - m_bufPos;
if (input.Length <= num) {
input.CopyTo(m_buf.AsSpan(m_bufPos));
m_bufPos += input.Length;
return;
}
input.Slice(0, num).CopyTo(m_buf.AsSpan(m_bufPos));
int num2 = num;
input = input.Slice(num2, input.Length - num2);
ProcessBufferAad(m_buf);
}
while (input.Length > RATE_BYTES) {
ProcessBufferAad(input);
int num2 = RATE_BYTES;
input = input.Slice(num2, input.Length - num2);
}
input.CopyTo(m_buf);
m_bufPos = input.Length;
}
}
public int ProcessByte(byte input, byte[] outBytes, int outOff)
{
return ProcessByte(input, Spans.FromNullable(outBytes, outOff));
}
public unsafe int ProcessByte(byte input, Span<byte> output)
{
byte* intPtr = stackalloc byte[1];
*intPtr = input;
Span<byte> span = new Span<byte>(intPtr, 1);
return ProcessBytes(span, output);
}
public int ProcessBytes(byte[] inBytes, int inOff, int len, byte[] outBytes, int outOff)
{
Check.DataLength(inBytes, inOff, len, "input buffer too short");
return ProcessBytes(inBytes.AsSpan(inOff, len), Spans.FromNullable(outBytes, outOff));
}
public int ProcessBytes(ReadOnlySpan<byte> input, Span<byte> output)
{
bool num = CheckData();
int num2 = 0;
ReadOnlySpan<byte> readOnlySpan;
if (num) {
if (m_bufPos > 0) {
int num3 = RATE_BYTES - m_bufPos;
if (input.Length <= num3) {
input.CopyTo(m_buf.AsSpan(m_bufPos));
m_bufPos += input.Length;
return 0;
}
readOnlySpan = input.Slice(0, num3);
readOnlySpan.CopyTo(m_buf.AsSpan(m_bufPos));
int num4 = num3;
input = input.Slice(num4, input.Length - num4);
ProcessBufferEncrypt(m_buf, output);
num2 = RATE_BYTES;
}
while (input.Length > RATE_BYTES) {
ReadOnlySpan<byte> buffer = input;
int num4 = num2;
ProcessBufferEncrypt(buffer, output.Slice(num4, output.Length - num4));
num4 = RATE_BYTES;
input = input.Slice(num4, input.Length - num4);
num2 += RATE_BYTES;
}
} else {
int num5 = m_bufferSizeDecrypt - m_bufPos;
if (input.Length <= num5) {
input.CopyTo(m_buf.AsSpan(m_bufPos));
m_bufPos += input.Length;
return 0;
}
if (m_bufPos > RATE_BYTES) {
ProcessBufferDecrypt(m_buf, output);
m_bufPos -= RATE_BYTES;
m_buf.AsSpan(0, m_bufPos).CopyFrom(m_buf.AsSpan(RATE_BYTES));
num2 = RATE_BYTES;
num5 += RATE_BYTES;
if (input.Length <= num5) {
input.CopyTo(m_buf.AsSpan(m_bufPos));
m_bufPos += input.Length;
return num2;
}
}
num5 = RATE_BYTES - m_bufPos;
readOnlySpan = input.Slice(0, num5);
readOnlySpan.CopyTo(m_buf.AsSpan(m_bufPos));
int num4 = num5;
input = input.Slice(num4, input.Length - num4);
ReadOnlySpan<byte> buffer2 = m_buf;
num4 = num2;
ProcessBufferDecrypt(buffer2, output.Slice(num4, output.Length - num4));
num2 += RATE_BYTES;
while (input.Length > m_bufferSizeDecrypt) {
ReadOnlySpan<byte> buffer3 = input;
num4 = num2;
ProcessBufferDecrypt(buffer3, output.Slice(num4, output.Length - num4));
num4 = RATE_BYTES;
input = input.Slice(num4, input.Length - num4);
num2 += RATE_BYTES;
}
}
input.CopyTo(m_buf);
m_bufPos = input.Length;
return num2;
}
public int DoFinal(byte[] outBytes, int outOff)
{
bool flag = CheckData();
int num;
if (flag)
num = m_bufPos + TAG_BYTES;
else {
if (m_bufPos < TAG_BYTES)
throw new InvalidCipherTextException("data too short");
m_bufPos -= TAG_BYTES;
num = m_bufPos;
}
Check.OutputLength(outBytes, outOff, num, "output buffer too short");
if (encrypted || m_bufPos > 0) {
state[STATE_WORDS - 1] ^= ((m_bufPos < RATE_BYTES) ? _M2 : _M3);
uint[] array = new uint[RATE_WORDS];
for (int i = 0; i < m_bufPos; i++) {
array[i >> 2] |= (uint)(m_buf[i] << ((i & 3) << 3));
}
if (m_bufPos < RATE_BYTES) {
if (!flag) {
int num2 = (m_bufPos & 3) << 3;
array[m_bufPos >> 2] |= state[m_bufPos >> 2] >> num2 << num2;
num2 = (m_bufPos >> 2) + 1;
Array.Copy(state, num2, array, num2, RATE_WORDS - num2);
}
array[m_bufPos >> 2] ^= (uint)(128 << ((m_bufPos & 3) << 3));
}
for (int j = 0; j < RATE_WORDS / 2; j++) {
int num3 = j + RATE_WORDS / 2;
uint num4 = state[j];
uint num5 = state[num3];
if (flag) {
state[j] = (num5 ^ array[j] ^ state[RATE_WORDS + j]);
state[num3] = (num4 ^ num5 ^ array[num3] ^ state[RATE_WORDS + (num3 & CAP_MASK)]);
} else {
state[j] = (num4 ^ num5 ^ array[j] ^ state[RATE_WORDS + j]);
state[num3] = (num4 ^ array[num3] ^ state[RATE_WORDS + (num3 & CAP_MASK)]);
}
array[j] ^= num4;
array[num3] ^= num5;
}
for (int k = 0; k < m_bufPos; k++) {
outBytes[outOff++] = (byte)(array[k >> 2] >> ((k & 3) << 3));
}
SparkleOpt(state, SPARKLE_STEPS_BIG);
}
for (int l = 0; l < KEY_WORDS; l++) {
state[RATE_WORDS + l] ^= this.k[l];
}
tag = new byte[TAG_BYTES];
Pack.UInt32_To_LE(state, RATE_WORDS, TAG_WORDS, tag, 0);
if (flag)
Array.Copy(tag, 0, outBytes, outOff, TAG_BYTES);
else if (!Arrays.FixedTimeEquals(TAG_BYTES, tag, 0, m_buf, m_bufPos)) {
throw new InvalidCipherTextException("mac check in " + AlgorithmName + " failed");
}
Reset(!flag);
return num;
}
public int DoFinal(Span<byte> output)
{
byte[] array = new byte[GetOutputSize(0)];
int num = DoFinal(array, 0);
array.AsSpan(0, num).CopyTo(output);
return num;
}
public byte[] GetMac()
{
return tag;
}
public int GetUpdateOutputSize(int len)
{
int num = System.Math.Max(0, len) - 1;
switch (m_state) {
case State.DecInit:
case State.DecAad:
num = System.Math.Max(0, num - TAG_BYTES);
break;
case State.DecData:
case State.DecFinal:
num = System.Math.Max(0, num + m_bufPos - TAG_BYTES);
break;
case State.EncData:
case State.EncFinal:
num = System.Math.Max(0, num + m_bufPos);
break;
}
return num - num % RATE_BYTES;
}
public int GetOutputSize(int len)
{
int num = System.Math.Max(0, len);
switch (m_state) {
case State.DecInit:
case State.DecAad:
return System.Math.Max(0, num - TAG_BYTES);
case State.DecData:
case State.DecFinal:
return System.Math.Max(0, num + m_bufPos - TAG_BYTES);
case State.EncData:
case State.EncFinal:
return num + m_bufPos + TAG_BYTES;
default:
return num + TAG_BYTES;
}
}
public void Reset()
{
Reset(true);
}
private void CheckAad()
{
switch (m_state) {
case State.EncAad:
case State.DecAad:
break;
case State.DecInit:
m_state = State.DecAad;
break;
case State.EncInit:
m_state = State.EncAad;
break;
case State.EncFinal:
throw new InvalidOperationException(AlgorithmName + " cannot be reused for encryption");
default:
throw new InvalidOperationException(AlgorithmName + " needs to be initialized");
}
}
private bool CheckData()
{
switch (m_state) {
case State.DecInit:
case State.DecAad:
FinishAad(State.DecData);
return false;
case State.EncInit:
case State.EncAad:
FinishAad(State.EncData);
return true;
case State.DecData:
return false;
case State.EncData:
return true;
case State.EncFinal:
throw new InvalidOperationException(AlgorithmName + " cannot be reused for encryption");
default:
throw new InvalidOperationException(AlgorithmName + " needs to be initialized");
}
}
private void FinishAad(State nextState)
{
State state = m_state;
if (state == State.EncAad || state == State.DecAad)
ProcessFinalAad();
m_bufPos = 0;
m_state = nextState;
}
private void ProcessBufferAad(ReadOnlySpan<byte> buffer)
{
for (int i = 0; i < RATE_WORDS / 2; i++) {
int num = i + (RATE_WORDS >> 1);
uint num2 = state[i];
uint num3 = state[num];
uint num4 = Pack.LE_To_UInt32(buffer, i << 2);
uint num5 = Pack.LE_To_UInt32(buffer, num << 2);
state[i] = (num3 ^ num4 ^ state[RATE_WORDS + i]);
state[num] = (num2 ^ num3 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]);
}
SparkleOpt(state, SPARKLE_STEPS_SLIM);
}
private void ProcessBufferDecrypt(ReadOnlySpan<byte> buffer, Span<byte> output)
{
Check.OutputLength(output, RATE_BYTES, "output buffer too short");
for (int i = 0; i < RATE_WORDS / 2; i++) {
int num = i + RATE_WORDS / 2;
uint num2 = state[i];
uint num3 = state[num];
uint num4 = Pack.LE_To_UInt32(buffer, i * 4);
uint num5 = Pack.LE_To_UInt32(buffer, num * 4);
state[i] = (num2 ^ num3 ^ num4 ^ state[RATE_WORDS + i]);
state[num] = (num2 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]);
Pack.UInt32_To_LE(num4 ^ num2, output, i * 4);
Pack.UInt32_To_LE(num5 ^ num3, output, num * 4);
}
SparkleOpt(state, SPARKLE_STEPS_SLIM);
encrypted = true;
}
private void ProcessBufferEncrypt(ReadOnlySpan<byte> buffer, Span<byte> output)
{
Check.OutputLength(output, RATE_BYTES, "output buffer too short");
for (int i = 0; i < RATE_WORDS / 2; i++) {
int num = i + RATE_WORDS / 2;
uint num2 = state[i];
uint num3 = state[num];
uint num4 = Pack.LE_To_UInt32(buffer, i * 4);
uint num5 = Pack.LE_To_UInt32(buffer, num * 4);
state[i] = (num3 ^ num4 ^ state[RATE_WORDS + i]);
state[num] = (num2 ^ num3 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]);
Pack.UInt32_To_LE(num4 ^ num2, output, i * 4);
Pack.UInt32_To_LE(num5 ^ num3, output, num * 4);
}
SparkleOpt(state, SPARKLE_STEPS_SLIM);
encrypted = true;
}
private void ProcessFinalAad()
{
if (m_bufPos < RATE_BYTES) {
state[STATE_WORDS - 1] ^= _A0;
m_buf[m_bufPos] = 128;
while (++m_bufPos < RATE_BYTES) {
m_buf[m_bufPos] = 0;
}
} else
state[STATE_WORDS - 1] ^= _A1;
for (int i = 0; i < RATE_WORDS / 2; i++) {
int num = i + RATE_WORDS / 2;
uint num2 = state[i];
uint num3 = state[num];
uint num4 = Pack.LE_To_UInt32(m_buf, i * 4);
uint num5 = Pack.LE_To_UInt32(m_buf, num * 4);
state[i] = (num3 ^ num4 ^ state[RATE_WORDS + i]);
state[num] = (num2 ^ num3 ^ num5 ^ state[RATE_WORDS + (num & CAP_MASK)]);
}
SparkleOpt(state, SPARKLE_STEPS_BIG);
}
private void Reset(bool clearMac)
{
if (clearMac)
tag = null;
Arrays.Clear(m_buf);
m_bufPos = 0;
encrypted = false;
switch (m_state) {
case State.DecAad:
case State.DecData:
case State.DecFinal:
m_state = State.DecInit;
break;
case State.EncAad:
case State.EncData:
case State.EncFinal:
m_state = State.EncFinal;
return;
default:
throw new InvalidOperationException(AlgorithmName + " needs to be initialized");
case State.EncInit:
case State.DecInit:
break;
}
Array.Copy(npub, 0, state, 0, RATE_WORDS);
Array.Copy(k, 0, state, RATE_WORDS, KEY_WORDS);
SparkleOpt(state, SPARKLE_STEPS_BIG);
if (initialAssociatedText != null)
ProcessAadBytes(initialAssociatedText);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ArxBox(uint rc, ref uint s00, ref uint s01)
{
s00 += Integers.RotateRight(s01, 31);
s01 ^= Integers.RotateRight(s00, 24);
s00 ^= rc;
s00 += Integers.RotateRight(s01, 17);
s01 ^= Integers.RotateRight(s00, 17);
s00 ^= rc;
s00 += s01;
s01 ^= Integers.RotateRight(s00, 31);
s00 ^= rc;
s00 += Integers.RotateRight(s01, 24);
s01 ^= Integers.RotateRight(s00, 16);
s00 ^= rc;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint ELL(uint x)
{
return Integers.RotateRight(x, 16) ^ (x & 65535);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void SparkleOpt(uint[] state, int steps)
{
switch (state.LongLength) {
case 8:
SparkleOpt8(state, steps);
break;
case 12:
SparkleOpt12(state, steps);
break;
case 16:
SparkleOpt16(state, steps);
break;
default:
throw new InvalidOperationException();
}
}
internal static void SparkleOpt8(uint[] state, int steps)
{
uint s = state[0];
uint s2 = state[1];
uint s3 = state[2];
uint s4 = state[3];
uint s5 = state[4];
uint s6 = state[5];
uint s7 = state[6];
uint s8 = state[7];
for (int i = 0; i < steps; i++) {
s2 ^= RCON[i & 7];
s4 = (uint)((int)s4 ^ i);
ArxBox(RCON[0], ref s, ref s2);
ArxBox(RCON[1], ref s3, ref s4);
ArxBox(RCON[2], ref s5, ref s6);
ArxBox(RCON[3], ref s7, ref s8);
uint num = ELL(s ^ s3);
uint num2 = ELL(s2 ^ s4);
uint num3 = s ^ s5;
uint num4 = s2 ^ s6;
uint num5 = s3 ^ s7;
uint num6 = s4 ^ s8;
s5 = s;
s6 = s2;
s7 = s3;
s8 = s4;
s = (num5 ^ num2);
s2 = (num6 ^ num);
s3 = (num3 ^ num2);
s4 = (num4 ^ num);
}
state[0] = s;
state[1] = s2;
state[2] = s3;
state[3] = s4;
state[4] = s5;
state[5] = s6;
state[6] = s7;
state[7] = s8;
}
internal static void SparkleOpt12(uint[] state, int steps)
{
uint s = state[0];
uint s2 = state[1];
uint s3 = state[2];
uint s4 = state[3];
uint s5 = state[4];
uint s6 = state[5];
uint s7 = state[6];
uint s8 = state[7];
uint s9 = state[8];
uint s10 = state[9];
uint s11 = state[10];
uint s12 = state[11];
for (int i = 0; i < steps; i++) {
s2 ^= RCON[i & 7];
s4 = (uint)((int)s4 ^ i);
ArxBox(RCON[0], ref s, ref s2);
ArxBox(RCON[1], ref s3, ref s4);
ArxBox(RCON[2], ref s5, ref s6);
ArxBox(RCON[3], ref s7, ref s8);
ArxBox(RCON[4], ref s9, ref s10);
ArxBox(RCON[5], ref s11, ref s12);
uint num = ELL(s ^ s3 ^ s5);
uint num2 = ELL(s2 ^ s4 ^ s6);
uint num3 = s ^ s7;
uint num4 = s2 ^ s8;
uint num5 = s3 ^ s9;
uint num6 = s4 ^ s10;
uint num7 = s5 ^ s11;
uint num8 = s6 ^ s12;
s7 = s;
s8 = s2;
s9 = s3;
s10 = s4;
s11 = s5;
s12 = s6;
s = (num5 ^ num2);
s2 = (num6 ^ num);
s3 = (num7 ^ num2);
s4 = (num8 ^ num);
s5 = (num3 ^ num2);
s6 = (num4 ^ num);
}
state[0] = s;
state[1] = s2;
state[2] = s3;
state[3] = s4;
state[4] = s5;
state[5] = s6;
state[6] = s7;
state[7] = s8;
state[8] = s9;
state[9] = s10;
state[10] = s11;
state[11] = s12;
}
internal static void SparkleOpt16(uint[] state, int steps)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled) {
Vector128<uint> s = Vector128.Create(state[0], state[2], state[4], state[6]);
Vector128<uint> s2 = Vector128.Create(state[1], state[3], state[5], state[7]);
Vector128<uint> s3 = Vector128.Create(state[8], state[10], state[12], state[14]);
Vector128<uint> s4 = Vector128.Create(state[9], state[11], state[13], state[15]);
Vector128<uint> rc = Load128(RCON.AsSpan(0));
Vector128<uint> rc2 = Load128(RCON.AsSpan(4));
for (int i = 0; i < steps; i++) {
s2 = System.Runtime.Intrinsics.X86.Sse2.Xor(s2, Vector128.Create(RCON[i & 7], (uint)i, 0, 0));
ArxBox(rc, ref s, ref s2);
ArxBox(rc2, ref s3, ref s4);
Vector128<uint> left = ELL(HorizontalXor(s));
Vector128<uint> left2 = ELL(HorizontalXor(s2));
Vector128<uint> value = System.Runtime.Intrinsics.X86.Sse2.Xor(s, s3);
Vector128<uint> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(s2, s4);
s3 = s;
s4 = s2;
s = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, System.Runtime.Intrinsics.X86.Sse2.Shuffle(value, 57));
s2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left, System.Runtime.Intrinsics.X86.Sse2.Shuffle(value2, 57));
}
Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackLow(s, s2), state.AsSpan(0));
Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(s, s2), state.AsSpan(4));
Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackLow(s3, s4), state.AsSpan(8));
Store128(System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(s3, s4), state.AsSpan(12));
} else {
uint s5 = state[0];
uint s6 = state[1];
uint s7 = state[2];
uint s8 = state[3];
uint s9 = state[4];
uint s10 = state[5];
uint s11 = state[6];
uint s12 = state[7];
uint s13 = state[8];
uint s14 = state[9];
uint s15 = state[10];
uint s16 = state[11];
uint s17 = state[12];
uint s18 = state[13];
uint s19 = state[14];
uint s20 = state[15];
int num = 0;
while (num < steps) {
s6 ^= RCON[num & 7];
s8 = (uint)((int)s8 ^ num++);
ArxBox(RCON[0], ref s5, ref s6);
ArxBox(RCON[1], ref s7, ref s8);
ArxBox(RCON[2], ref s9, ref s10);
ArxBox(RCON[3], ref s11, ref s12);
ArxBox(RCON[4], ref s13, ref s14);
ArxBox(RCON[5], ref s15, ref s16);
ArxBox(RCON[6], ref s17, ref s18);
ArxBox(RCON[7], ref s19, ref s20);
uint num3 = ELL(s5 ^ s7 ^ s9 ^ s11);
uint num4 = ELL(s6 ^ s8 ^ s10 ^ s12);
uint num5 = s13;
uint num6 = s14;
s13 = (s7 ^ s15 ^ num4);
s14 = (s8 ^ s16 ^ num3);
s15 = (s9 ^ s17 ^ num4);
s16 = (s10 ^ s18 ^ num3);
s17 = (s11 ^ s19 ^ num4);
s18 = (s12 ^ s20 ^ num3);
s19 = (s5 ^ num5 ^ num4);
s20 = (s6 ^ num6 ^ num3);
s14 ^= RCON[num & 7];
s16 = (uint)((int)s16 ^ num++);
ArxBox(RCON[0], ref s13, ref s14);
ArxBox(RCON[1], ref s15, ref s16);
ArxBox(RCON[2], ref s17, ref s18);
ArxBox(RCON[3], ref s19, ref s20);
ArxBox(RCON[4], ref s5, ref s6);
ArxBox(RCON[5], ref s7, ref s8);
ArxBox(RCON[6], ref s9, ref s10);
ArxBox(RCON[7], ref s11, ref s12);
uint num8 = ELL(s13 ^ s15 ^ s17 ^ s19);
uint num9 = ELL(s14 ^ s16 ^ s18 ^ s20);
uint num10 = s5;
uint num11 = s6;
s5 = (s7 ^ s15 ^ num9);
s6 = (s8 ^ s16 ^ num8);
s7 = (s9 ^ s17 ^ num9);
s8 = (s10 ^ s18 ^ num8);
s9 = (s11 ^ s19 ^ num9);
s10 = (s12 ^ s20 ^ num8);
s11 = (num10 ^ s13 ^ num9);
s12 = (num11 ^ s14 ^ num8);
}
state[0] = s5;
state[1] = s6;
state[2] = s7;
state[3] = s8;
state[4] = s9;
state[5] = s10;
state[6] = s11;
state[7] = s12;
state[8] = s13;
state[9] = s14;
state[10] = s15;
state[11] = s16;
state[12] = s17;
state[13] = s18;
state[14] = s19;
state[15] = s20;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ArxBox(Vector128<uint> rc, ref Vector128<uint> s00, ref Vector128<uint> s01)
{
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s01, 31));
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s01, 1));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 24));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 8));
s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc);
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s01, 17));
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s01, 15));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 17));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 15));
s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc);
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, s01);
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 31));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 1));
s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc);
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s01, 24));
s00 = System.Runtime.Intrinsics.X86.Sse2.Add(s00, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s01, 8));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(s00, 16));
s01 = System.Runtime.Intrinsics.X86.Sse2.Xor(s01, System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(s00, 16));
s00 = System.Runtime.Intrinsics.X86.Sse2.Xor(s00, rc);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<uint> ELL(Vector128<uint> x)
{
Vector128<uint> vector = System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(x, 16);
Vector128<uint> value = System.Runtime.Intrinsics.X86.Sse2.Xor(x, vector);
return System.Runtime.Intrinsics.X86.Sse2.Xor(vector, System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(value, 16));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<uint> HorizontalXor(Vector128<uint> x)
{
Vector128<uint> vector = System.Runtime.Intrinsics.X86.Sse2.Xor(x, System.Runtime.Intrinsics.X86.Sse2.Shuffle(x, 27));
return System.Runtime.Intrinsics.X86.Sse2.Xor(vector, System.Runtime.Intrinsics.X86.Sse2.Shuffle(vector, 177));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<uint> Load128(ReadOnlySpan<uint> t)
{
if (Vector.IsPackedLittleEndian)
return MemoryMarshal.Read<Vector128<uint>>(MemoryMarshal.AsBytes(t));
return Vector128.Create(t[0], t[1], t[2], t[3]);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Store128(Vector128<uint> s, Span<uint> t)
{
Span<byte> destination = MemoryMarshal.AsBytes(t);
if (Vector.IsPackedLittleEndian)
MemoryMarshal.Write(destination, ref s);
else {
Vector128<ulong> vector = s.AsUInt64();
BinaryPrimitives.WriteUInt64LittleEndian(destination.Slice(0, 8), vector.GetElement(0));
BinaryPrimitives.WriteUInt64LittleEndian(destination.Slice(8, destination.Length - 8), vector.GetElement(1));
}
}
}
}