<PackageReference Include="BouncyCastle.Cryptography" Version="2.3.0" />

ChaChaEngine

public class ChaChaEngine : Salsa20Engine
Implementation of Daniel J. Bernstein's ChaCha stream cipher.
using Org.BouncyCastle.Crypto.Utilities; using Org.BouncyCastle.Runtime.Intrinsics; using Org.BouncyCastle.Runtime.Intrinsics.X86; using Org.BouncyCastle.Utilities; using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace Org.BouncyCastle.Crypto.Engines { public class ChaChaEngine : Salsa20Engine { public override string AlgorithmName => "ChaCha" + rounds.ToString(); public ChaChaEngine() { } public ChaChaEngine(int rounds) : base(rounds) { } protected override void AdvanceCounter() { if (++engineState[12] == 0) engineState[13]++; } protected override void ResetCounter() { engineState[12] = (engineState[13] = 0); } protected override void SetKey(byte[] keyBytes, byte[] ivBytes) { if (keyBytes != null) { if (keyBytes.Length != 16 && keyBytes.Length != 32) throw new ArgumentException(AlgorithmName + " requires 128 bit or 256 bit key"); Salsa20Engine.PackTauOrSigma(keyBytes.Length, engineState, 0); Pack.LE_To_UInt32(keyBytes, 0, engineState, 4, 4); Pack.LE_To_UInt32(keyBytes, keyBytes.Length - 16, engineState, 8, 4); } Pack.LE_To_UInt32(ivBytes, 0, engineState, 14, 2); } protected override void GenerateKeyStream(byte[] output) { ChachaCore(rounds, engineState, output); } internal static void ChachaCore(int rounds, uint[] input, byte[] output) { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled) { Vector128<uint> vector = Load128_UInt32(input.AsSpan()); Vector128<uint> vector2 = Load128_UInt32(input.AsSpan(4)); Vector128<uint> vector3 = Load128_UInt32(input.AsSpan(8)); Vector128<uint> vector4 = Load128_UInt32(input.AsSpan(12)); Vector128<uint> vector5 = vector; Vector128<uint> vector6 = vector2; Vector128<uint> left = vector3; Vector128<uint> left2 = vector4; for (int num = rounds; num > 0; num -= 2) { vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 16), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 16)); left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 12), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 20)); vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 8), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 24)); left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 7), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 25)); vector6 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(vector6, 57); left = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left, 78); left2 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left2, 147); vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 16), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 16)); left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 12), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 20)); vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector6); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, vector5); left2 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(left2, 8), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(left2, 24)); left = System.Runtime.Intrinsics.X86.Sse2.Add(left, left2); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector6, left); vector6 = System.Runtime.Intrinsics.X86.Sse2.Xor(System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(vector6, 7), System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(vector6, 25)); vector6 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(vector6, 147); left = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left, 78); left2 = System.Runtime.Intrinsics.X86.Sse2.Shuffle(left2, 57); } vector5 = System.Runtime.Intrinsics.X86.Sse2.Add(vector5, vector); vector6 = System.Runtime.Intrinsics.X86.Sse2.Add(vector6, vector2); left = System.Runtime.Intrinsics.X86.Sse2.Add(left, vector3); left2 = System.Runtime.Intrinsics.X86.Sse2.Add(left2, vector4); Store128_UInt32(vector5, output.AsSpan()); Store128_UInt32(vector6, output.AsSpan(16)); Store128_UInt32(left, output.AsSpan(32)); Store128_UInt32(left2, output.AsSpan(48)); } else { uint num2 = input[0]; uint num3 = input[1]; uint num4 = input[2]; uint num5 = input[3]; uint num6 = input[4]; uint num7 = input[5]; uint num8 = input[6]; uint num9 = input[7]; uint num10 = input[8]; uint num11 = input[9]; uint num12 = input[10]; uint num13 = input[11]; uint num14 = input[12]; uint num15 = input[13]; uint num16 = input[14]; uint num17 = input[15]; for (int num18 = rounds; num18 > 0; num18 -= 2) { num2 += num6; num14 = Integers.RotateLeft(num14 ^ num2, 16); num3 += num7; num15 = Integers.RotateLeft(num15 ^ num3, 16); num4 += num8; num16 = Integers.RotateLeft(num16 ^ num4, 16); num5 += num9; num17 = Integers.RotateLeft(num17 ^ num5, 16); num10 += num14; num6 = Integers.RotateLeft(num6 ^ num10, 12); num11 += num15; num7 = Integers.RotateLeft(num7 ^ num11, 12); num12 += num16; num8 = Integers.RotateLeft(num8 ^ num12, 12); num13 += num17; num9 = Integers.RotateLeft(num9 ^ num13, 12); num2 += num6; num14 = Integers.RotateLeft(num14 ^ num2, 8); num3 += num7; num15 = Integers.RotateLeft(num15 ^ num3, 8); num4 += num8; num16 = Integers.RotateLeft(num16 ^ num4, 8); num5 += num9; num17 = Integers.RotateLeft(num17 ^ num5, 8); num10 += num14; num6 = Integers.RotateLeft(num6 ^ num10, 7); num11 += num15; num7 = Integers.RotateLeft(num7 ^ num11, 7); num12 += num16; num8 = Integers.RotateLeft(num8 ^ num12, 7); num13 += num17; num9 = Integers.RotateLeft(num9 ^ num13, 7); num2 += num7; num17 = Integers.RotateLeft(num17 ^ num2, 16); num3 += num8; num14 = Integers.RotateLeft(num14 ^ num3, 16); num4 += num9; num15 = Integers.RotateLeft(num15 ^ num4, 16); num5 += num6; num16 = Integers.RotateLeft(num16 ^ num5, 16); num12 += num17; num7 = Integers.RotateLeft(num7 ^ num12, 12); num13 += num14; num8 = Integers.RotateLeft(num8 ^ num13, 12); num10 += num15; num9 = Integers.RotateLeft(num9 ^ num10, 12); num11 += num16; num6 = Integers.RotateLeft(num6 ^ num11, 12); num2 += num7; num17 = Integers.RotateLeft(num17 ^ num2, 8); num3 += num8; num14 = Integers.RotateLeft(num14 ^ num3, 8); num4 += num9; num15 = Integers.RotateLeft(num15 ^ num4, 8); num5 += num6; num16 = Integers.RotateLeft(num16 ^ num5, 8); num12 += num17; num7 = Integers.RotateLeft(num7 ^ num12, 7); num13 += num14; num8 = Integers.RotateLeft(num8 ^ num13, 7); num10 += num15; num9 = Integers.RotateLeft(num9 ^ num10, 7); num11 += num16; num6 = Integers.RotateLeft(num6 ^ num11, 7); } Pack.UInt32_To_LE(num2 + input[0], output, 0); Pack.UInt32_To_LE(num3 + input[1], output, 4); Pack.UInt32_To_LE(num4 + input[2], output, 8); Pack.UInt32_To_LE(num5 + input[3], output, 12); Pack.UInt32_To_LE(num6 + input[4], output, 16); Pack.UInt32_To_LE(num7 + input[5], output, 20); Pack.UInt32_To_LE(num8 + input[6], output, 24); Pack.UInt32_To_LE(num9 + input[7], output, 28); Pack.UInt32_To_LE(num10 + input[8], output, 32); Pack.UInt32_To_LE(num11 + input[9], output, 36); Pack.UInt32_To_LE(num12 + input[10], output, 40); Pack.UInt32_To_LE(num13 + input[11], output, 44); Pack.UInt32_To_LE(num14 + input[12], output, 48); Pack.UInt32_To_LE(num15 + input[13], output, 52); Pack.UInt32_To_LE(num16 + input[14], output, 56); Pack.UInt32_To_LE(num17 + input[15], output, 60); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128<uint> Load128_UInt32(ReadOnlySpan<uint> t) { if (Vector.IsPackedLittleEndian) return MemoryMarshal.Read<Vector128<uint>>(MemoryMarshal.AsBytes(t)); return Vector128.Create(t[0], t[1], t[2], t[3]); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Store128_UInt32(Vector128<uint> s, Span<byte> t) { if (Vector.IsPackedLittleEndian) MemoryMarshal.Write(t, ref s); else { Vector128<ulong> vector = s.AsUInt64(); BinaryPrimitives.WriteUInt64LittleEndian(t.Slice(0, 8), vector.GetElement(0)); BinaryPrimitives.WriteUInt64LittleEndian(t.Slice(8, t.Length - 8), vector.GetElement(1)); } } } }