ChaChaEngine
Implementation of Daniel J. Bernstein's ChaCha stream cipher.
using Org.BouncyCastle.Crypto.Utilities;
using Org.BouncyCastle.Runtime.Intrinsics;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using Org.BouncyCastle.Utilities;
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Crypto.Engines
{
public class ChaChaEngine : Salsa20Engine
{
public override string AlgorithmName => "ChaCha" + rounds.ToString();
public ChaChaEngine()
{
}
public ChaChaEngine(int rounds)
: base(rounds)
{
}
protected override void AdvanceCounter()
{
if (++engineState[12] == 0)
engineState[13]++;
}
protected override void ResetCounter()
{
engineState[12] = (engineState[13] = 0);
}
protected override void SetKey(byte[] keyBytes, byte[] ivBytes)
{
if (keyBytes != null) {
if (keyBytes.Length != 16 && keyBytes.Length != 32)
throw new ArgumentException(AlgorithmName + " requires 128 bit or 256 bit key");
Salsa20Engine.PackTauOrSigma(keyBytes.Length, engineState, 0);
Pack.LE_To_UInt32(keyBytes, 0, engineState, 4, 4);
Pack.LE_To_UInt32(keyBytes, keyBytes.Length - 16, engineState, 8, 4);
}
Pack.LE_To_UInt32(ivBytes, 0, engineState, 14, 2);
}
protected override void GenerateKeyStream(byte[] output)
{
ChachaCore(rounds, engineState, output);
}
internal static void ChachaCore(int rounds, uint[] input, byte[] output)
{
if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled) {
Vector128<uint> vector = Load128_UInt32(input.AsSpan());
Vector128<uint> vector2 = Load128_UInt32(input.AsSpan(4));
Vector128<uint> vector3 = Load128_UInt32(input.AsSpan(8));
Vector128<uint> vector4 = Load128_UInt32(input.AsSpan(12));
Vector128<uint> vector5 = vector;
Vector128<uint> vector6 = vector2;
Vector128<uint> left = vector3;
Vector128<uint> left2 = vector4;
for (int num = rounds; num > 0; num -= 2) {
vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 16), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 16));
left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 12), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 20));
vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 8), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 24));
left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 7), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 25));
vector6 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(vector6, 57);
left = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left, 78);
left2 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left2, 147);
vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 16), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 16));
left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 12), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 20));
vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5);
left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 8), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 24));
left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 7), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 25));
vector6 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(vector6, 147);
left = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left, 78);
left2 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left2, 57);
}
vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector);
vector6 = System.Runtime.Intrinsics.X86.Sse2.Add(vector6, vector2);
left = System.Runtime.Intrinsics.X86.Sse2.Add(left, vector3);
left2 = System.Runtime.Intrinsics.X86.Sse2.Add(left2, vector4);
Store128_UInt32(vector5, output.AsSpan());
Store128_UInt32(vector6, output.AsSpan(16));
Store128_UInt32(left, output.AsSpan(32));
Store128_UInt32(left2, output.AsSpan(48));
} else {
uint num2 = input[0];
uint num3 = input[1];
uint num4 = input[2];
uint num5 = input[3];
uint num6 = input[4];
uint num7 = input[5];
uint num8 = input[6];
uint num9 = input[7];
uint num10 = input[8];
uint num11 = input[9];
uint num12 = input[10];
uint num13 = input[11];
uint num14 = input[12];
uint num15 = input[13];
uint num16 = input[14];
uint num17 = input[15];
for (int num18 = rounds; num18 > 0; num18 -= 2) {
num2 += num6;
num14 = Integers.RotateLeft(num14 ^ num2, 16);
num3 += num7;
num15 = Integers.RotateLeft(num15 ^ num3, 16);
num4 += num8;
num16 = Integers.RotateLeft(num16 ^ num4, 16);
num5 += num9;
num17 = Integers.RotateLeft(num17 ^ num5, 16);
num10 += num14;
num6 = Integers.RotateLeft(num6 ^ num10, 12);
num11 += num15;
num7 = Integers.RotateLeft(num7 ^ num11, 12);
num12 += num16;
num8 = Integers.RotateLeft(num8 ^ num12, 12);
num13 += num17;
num9 = Integers.RotateLeft(num9 ^ num13, 12);
num2 += num6;
num14 = Integers.RotateLeft(num14 ^ num2, 8);
num3 += num7;
num15 = Integers.RotateLeft(num15 ^ num3, 8);
num4 += num8;
num16 = Integers.RotateLeft(num16 ^ num4, 8);
num5 += num9;
num17 = Integers.RotateLeft(num17 ^ num5, 8);
num10 += num14;
num6 = Integers.RotateLeft(num6 ^ num10, 7);
num11 += num15;
num7 = Integers.RotateLeft(num7 ^ num11, 7);
num12 += num16;
num8 = Integers.RotateLeft(num8 ^ num12, 7);
num13 += num17;
num9 = Integers.RotateLeft(num9 ^ num13, 7);
num2 += num7;
num17 = Integers.RotateLeft(num17 ^ num2, 16);
num3 += num8;
num14 = Integers.RotateLeft(num14 ^ num3, 16);
num4 += num9;
num15 = Integers.RotateLeft(num15 ^ num4, 16);
num5 += num6;
num16 = Integers.RotateLeft(num16 ^ num5, 16);
num12 += num17;
num7 = Integers.RotateLeft(num7 ^ num12, 12);
num13 += num14;
num8 = Integers.RotateLeft(num8 ^ num13, 12);
num10 += num15;
num9 = Integers.RotateLeft(num9 ^ num10, 12);
num11 += num16;
num6 = Integers.RotateLeft(num6 ^ num11, 12);
num2 += num7;
num17 = Integers.RotateLeft(num17 ^ num2, 8);
num3 += num8;
num14 = Integers.RotateLeft(num14 ^ num3, 8);
num4 += num9;
num15 = Integers.RotateLeft(num15 ^ num4, 8);
num5 += num6;
num16 = Integers.RotateLeft(num16 ^ num5, 8);
num12 += num17;
num7 = Integers.RotateLeft(num7 ^ num12, 7);
num13 += num14;
num8 = Integers.RotateLeft(num8 ^ num13, 7);
num10 += num15;
num9 = Integers.RotateLeft(num9 ^ num10, 7);
num11 += num16;
num6 = Integers.RotateLeft(num6 ^ num11, 7);
}
Pack.UInt32_To_LE(num2 + input[0], output, 0);
Pack.UInt32_To_LE(num3 + input[1], output, 4);
Pack.UInt32_To_LE(num4 + input[2], output, 8);
Pack.UInt32_To_LE(num5 + input[3], output, 12);
Pack.UInt32_To_LE(num6 + input[4], output, 16);
Pack.UInt32_To_LE(num7 + input[5], output, 20);
Pack.UInt32_To_LE(num8 + input[6], output, 24);
Pack.UInt32_To_LE(num9 + input[7], output, 28);
Pack.UInt32_To_LE(num10 + input[8], output, 32);
Pack.UInt32_To_LE(num11 + input[9], output, 36);
Pack.UInt32_To_LE(num12 + input[10], output, 40);
Pack.UInt32_To_LE(num13 + input[11], output, 44);
Pack.UInt32_To_LE(num14 + input[12], output, 48);
Pack.UInt32_To_LE(num15 + input[13], output, 52);
Pack.UInt32_To_LE(num16 + input[14], output, 56);
Pack.UInt32_To_LE(num17 + input[15], output, 60);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<uint> Load128_UInt32(ReadOnlySpan<uint> t)
{
if (Vector.IsPackedLittleEndian)
return MemoryMarshal.Read<Vector128<uint>>(MemoryMarshal.AsBytes(t));
return Vector128.Create(t[0], t[1], t[2], t[3]);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Store128_UInt32(Vector128<uint> s, Span<byte> t)
{
if (Vector.IsPackedLittleEndian)
MemoryMarshal.Write(t, ref s);
else {
Vector128<ulong> vector = s.AsUInt64();
BinaryPrimitives.WriteUInt64LittleEndian(t.Slice(0, 8), vector.GetElement(0));
BinaryPrimitives.WriteUInt64LittleEndian(t.Slice(8, t.Length - 8), vector.GetElement(1));
}
}
}
}