<PackageReference Include="BouncyCastle.Cryptography" Version="2.5.1" />

GcmBlockCipher

Implements the Galois/Counter mode (GCM) detailed in NIST Special Publication 800-38D.
using Org.BouncyCastle.Crypto.Engines; using Org.BouncyCastle.Crypto.Modes.Gcm; using Org.BouncyCastle.Crypto.Parameters; using Org.BouncyCastle.Crypto.Utilities; using Org.BouncyCastle.Runtime.Intrinsics; using Org.BouncyCastle.Runtime.Intrinsics.X86; using Org.BouncyCastle.Utilities; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace Org.BouncyCastle.Crypto.Modes { public sealed class GcmBlockCipher : IAeadBlockCipher, IAeadCipher { private static readonly Vector128<byte> ReverseBytesMask = Vector128.Create((byte)15, (byte)14, (byte)13, (byte)12, (byte)11, (byte)10, (byte)9, (byte)8, (byte)7, (byte)6, (byte)5, (byte)4, (byte)3, (byte)2, (byte)1, (byte)0); private const int BlockSize = 16; private readonly IBlockCipher cipher; private readonly IGcmMultiplier multiplier; private IGcmExponentiator exp; private bool forEncryption; private bool initialised; private int macSize; private byte[] lastKey; private byte[] nonce; private byte[] initialAssociatedText; private byte[] H; private Vector128<ulong>[] HPow; private byte[] J0; private byte[] bufBlock; private byte[] macBlock; private byte[] S; private byte[] S_at; private byte[] S_atPre; private byte[] counter; private uint counter32; private uint blocksRemaining; private int bufOff; private ulong totalLength; private byte[] atBlock; private int atBlockPos; private ulong atLength; private ulong atLengthPre; private static bool IsFourWaySupported { get { if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && Org.BouncyCastle.Runtime.Intrinsics.X86.Ssse3.IsEnabled) return Vector.IsPacked; return false; } } public string AlgorithmName => cipher.AlgorithmName + "/GCM"; public IBlockCipher UnderlyingCipher => cipher; internal static IGcmMultiplier CreateGcmMultiplier() { if (BasicGcmMultiplier.IsHardwareAccelerated) return new BasicGcmMultiplier(); return new Tables4kGcmMultiplier(); } public GcmBlockCipher(IBlockCipher c) : this(c, null) { } [Obsolete("Will be removed")] public GcmBlockCipher(IBlockCipher c, IGcmMultiplier m) { if (c.GetBlockSize() != 16) throw new ArgumentException("cipher required with a block size of " + 16.ToString() + "."); if (m == null) m = CreateGcmMultiplier(); cipher = c; multiplier = m; } public int GetBlockSize() { return 16; } public void Init(bool forEncryption, ICipherParameters parameters) { this.forEncryption = forEncryption; macBlock = null; initialised = true; AeadParameters aeadParameters = parameters as AeadParameters; ReadOnlySpan<byte> iV; KeyParameter keyParameter; if (aeadParameters != null) { iV = aeadParameters.Nonce; initialAssociatedText = aeadParameters.GetAssociatedText(); int num = aeadParameters.MacSize; if (num < 32 || num > 128 || num % 8 != 0) throw new ArgumentException("Invalid value for MAC size: " + num.ToString()); macSize = num / 8; keyParameter = aeadParameters.Key; } else { ParametersWithIV parametersWithIV = parameters as ParametersWithIV; if (parametersWithIV == null) throw new ArgumentException("invalid parameters passed to GCM"); iV = parametersWithIV.IV; initialAssociatedText = null; macSize = 16; keyParameter = (KeyParameter)parametersWithIV.Parameters; } int num2 = forEncryption ? 16 : (16 + macSize); bufBlock = new byte[num2]; if (iV.Length < 1) throw new ArgumentException("IV must be at least 1 byte"); if (forEncryption && nonce != null && iV.SequenceEqual(nonce)) { if (keyParameter == null) throw new ArgumentException("cannot reuse nonce for GCM encryption"); if (lastKey != null && keyParameter.FixedTimeEquals(lastKey)) throw new ArgumentException("cannot reuse nonce for GCM encryption"); } nonce = iV.ToArray(); if (keyParameter != null) lastKey = keyParameter.GetKey(); if (keyParameter != null) { cipher.Init(true, keyParameter); H = new byte[16]; cipher.ProcessBlock(H, 0, H, 0); multiplier.Init(H); exp = null; if (IsFourWaySupported) { Vector128<ulong> vector = GcmUtilities.Load(H); Vector128<ulong> vector2 = GcmUtilities.Square(vector); Vector128<ulong> vector3 = GcmUtilities.Multiply(vector, vector2); Vector128<ulong> vector4 = GcmUtilities.Square(vector2); HPow = new Vector128<ulong>[4] { vector4, vector3, vector2, vector }; } } else if (H == null) { throw new ArgumentException("Key must be specified in initial Init"); } J0 = new byte[16]; if (nonce.Length == 12) { Array.Copy(nonce, 0, J0, 0, nonce.Length); J0[15] = 1; } else { gHASH(J0, nonce, nonce.Length); byte[] array = new byte[16]; Pack.UInt64_To_BE((ulong)((long)nonce.Length * 8), array, 8); gHASHBlock(J0, array); } S = new byte[16]; S_at = new byte[16]; S_atPre = new byte[16]; atBlock = new byte[16]; atBlockPos = 0; atLength = 0; atLengthPre = 0; counter = Arrays.Clone(J0); counter32 = Pack.BE_To_UInt32(counter, 12); blocksRemaining = 4294967294; bufOff = 0; totalLength = 0; if (initialAssociatedText != null) ProcessAadBytes(initialAssociatedText); } public byte[] GetMac() { if (macBlock != null) return (byte[])macBlock.Clone(); return new byte[macSize]; } public int GetOutputSize(int len) { int num = len + bufOff; if (forEncryption) return num + macSize; if (num >= macSize) return num - macSize; return 0; } public int GetUpdateOutputSize(int len) { int num = len + bufOff; if (!forEncryption) { if (num < macSize) return 0; num -= macSize; } return num - num % 16; } public void ProcessAadByte(byte input) { CheckStatus(); atBlock[atBlockPos] = input; if (++atBlockPos == 16) { gHASHBlock(S_at, atBlock); atBlockPos = 0; atLength += 16; } } public void ProcessAadBytes(byte[] inBytes, int inOff, int len) { ProcessAadBytes(inBytes.AsSpan(inOff, len)); } public void ProcessAadBytes(ReadOnlySpan<byte> input) { CheckStatus(); if (atBlockPos > 0) { int num = 16 - atBlockPos; if (input.Length < num) { input.CopyTo(atBlock.AsSpan(atBlockPos)); atBlockPos += input.Length; return; } input.Slice(0, num).CopyTo(atBlock.AsSpan(atBlockPos)); gHASHBlock(S_at, atBlock); atLength += 16; int num2 = num; input = input.Slice(num2, input.Length - num2); } while (input.Length >= 16) { gHASHBlock(S_at, input); atLength += 16; input = input.Slice(16, input.Length - 16); } input.CopyTo(atBlock); atBlockPos = input.Length; } private void InitCipher() { if (atLength != 0) { Array.Copy(S_at, 0, S_atPre, 0, 16); atLengthPre = atLength; } if (atBlockPos > 0) { gHASHPartial(S_atPre, atBlock, 0, atBlockPos); atLengthPre += (uint)atBlockPos; } if (atLengthPre != 0) Array.Copy(S_atPre, 0, S, 0, 16); } public int ProcessByte(byte input, byte[] output, int outOff) { CheckStatus(); bufBlock[bufOff] = input; if (++bufOff == bufBlock.Length) { Check.OutputLength(output, outOff, 16, "output buffer too short"); if (blocksRemaining == 0) throw new InvalidOperationException("Attempt to process too many blocks"); blocksRemaining--; if (totalLength == 0) InitCipher(); if (forEncryption) { EncryptBlock(bufBlock, output.AsSpan(outOff)); bufOff = 0; } else { DecryptBlock(bufBlock, output.AsSpan(outOff)); Array.Copy(bufBlock, 16, bufBlock, 0, macSize); bufOff = macSize; } totalLength += 16; return 16; } return 0; } public int ProcessByte(byte input, Span<byte> output) { CheckStatus(); bufBlock[bufOff] = input; if (++bufOff == bufBlock.Length) { Check.OutputLength(output, 16, "output buffer too short"); if (blocksRemaining == 0) throw new InvalidOperationException("Attempt to process too many blocks"); blocksRemaining--; if (totalLength == 0) InitCipher(); if (forEncryption) { EncryptBlock(bufBlock, output); bufOff = 0; } else { DecryptBlock(bufBlock, output); Array.Copy(bufBlock, 16, bufBlock, 0, macSize); bufOff = macSize; } totalLength += 16; return 16; } return 0; } public int ProcessBytes(byte[] input, int inOff, int len, byte[] output, int outOff) { CheckStatus(); Check.DataLength(input, inOff, len, "input buffer too short"); return ProcessBytes(input.AsSpan(inOff, len), Spans.FromNullable(output, outOff)); } public int ProcessBytes(ReadOnlySpan<byte> input, Span<byte> output) { CheckStatus(); int num = bufOff + input.Length; ReadOnlySpan<byte> readOnlySpan; if (forEncryption) { num &= -16; if (num > 0) { Check.OutputLength(output, num, "output buffer too short"); uint num2 = (uint)num >> 4; if (blocksRemaining < num2) throw new InvalidOperationException("Attempt to process too many blocks"); blocksRemaining -= num2; if (totalLength == 0) InitCipher(); } if (bufOff > 0) { int num3 = 16 - bufOff; if (input.Length < num3) { input.CopyTo(bufBlock.AsSpan(bufOff)); bufOff += input.Length; return 0; } readOnlySpan = input.Slice(0, num3); readOnlySpan.CopyTo(bufBlock.AsSpan(bufOff)); int num4 = num3; input = input.Slice(num4, input.Length - num4); EncryptBlock(bufBlock, output); output = output.Slice(16, output.Length - 16); } if (!IsFourWaySupported || input.Length < 64) { while (input.Length >= 32) { EncryptBlocks2(input, output); input = input.Slice(32, input.Length - 32); output = output.Slice(32, output.Length - 32); } } else { EncryptBlocks4(ref input, ref output); if (input.Length >= 32) { EncryptBlocks2(input, output); input = input.Slice(32, input.Length - 32); output = output.Slice(32, output.Length - 32); } } if (input.Length >= 16) { EncryptBlock(input, output); input = input.Slice(16, input.Length - 16); } bufOff = input.Length; input.CopyTo(bufBlock); } else { num -= macSize; num &= -16; if (num > 0) { Check.OutputLength(output, num, "output buffer too short"); uint num5 = (uint)num >> 4; if (blocksRemaining < num5) throw new InvalidOperationException("Attempt to process too many blocks"); blocksRemaining -= num5; if (totalLength == 0) InitCipher(); } int num6 = bufBlock.Length - bufOff; if (input.Length < num6) { input.CopyTo(bufBlock.AsSpan(bufOff)); bufOff += input.Length; return 0; } if (bufOff >= 16) { DecryptBlock(bufBlock, output); output = output.Slice(16, output.Length - 16); bufOff -= 16; bufBlock.AsSpan(0, bufOff).CopyFrom(bufBlock.AsSpan(16)); num6 += 16; if (input.Length < num6) { input.CopyTo(bufBlock.AsSpan(bufOff)); bufOff += input.Length; totalLength += 16; return 16; } } int num7 = bufBlock.Length; int num8 = num7 + 16; int num9 = num7 + 48; num6 = 16 - bufOff; readOnlySpan = input.Slice(0, num6); readOnlySpan.CopyTo(bufBlock.AsSpan(bufOff)); int num4 = num6; input = input.Slice(num4, input.Length - num4); DecryptBlock(bufBlock, output); output = output.Slice(16, output.Length - 16); if (!IsFourWaySupported || input.Length < num9) { while (input.Length >= num8) { DecryptBlocks2(input, output); input = input.Slice(32, input.Length - 32); output = output.Slice(32, output.Length - 32); } } else { DecryptBlocks4(ref input, ref output, num9); if (input.Length >= num8) { DecryptBlocks2(input, output); input = input.Slice(32, input.Length - 32); output = output.Slice(32, output.Length - 32); } } if (input.Length >= num7) { DecryptBlock(input, output); input = input.Slice(16, input.Length - 16); } bufOff = input.Length; input.CopyTo(bufBlock); } totalLength += (uint)num; return num; } public int DoFinal(byte[] output, int outOff) { return DoFinal(output.AsSpan(outOff)); } public unsafe int DoFinal(Span<byte> output) { CheckStatus(); int num = bufOff; if (forEncryption) Check.OutputLength(output, num + macSize, "output buffer too short"); else { if (num < macSize) throw new InvalidCipherTextException("data too short"); num -= macSize; Check.OutputLength(output, num, "output buffer too short"); } if (totalLength == 0) InitCipher(); if (num > 0) { if (blocksRemaining == 0) throw new InvalidOperationException("Attempt to process too many blocks"); blocksRemaining--; ProcessPartial(bufBlock.AsSpan(0, num), output); } atLength += (uint)atBlockPos; if (atLength > atLengthPre) { if (atBlockPos > 0) gHASHPartial(S_at, atBlock, 0, atBlockPos); if (atLengthPre != 0) GcmUtilities.Xor(S_at, S_atPre); long pow = (long)(totalLength * 8 + 127 >> 7); byte[] array = new byte[16]; if (exp == null) { exp = new BasicGcmExponentiator(); exp.Init(H); } exp.ExponentiateX(pow, array); GcmUtilities.Multiply(S_at, array); GcmUtilities.Xor(S, S_at); } Span<byte> span = new Span<byte>(stackalloc byte[16], 16); Pack.UInt64_To_BE(atLength * 8, span); Pack.UInt64_To_BE(totalLength * 8, span.Slice(8, span.Length - 8)); gHASHBlock(S, span); Span<byte> span2 = new Span<byte>(stackalloc byte[16], 16); cipher.ProcessBlock(J0, span2); GcmUtilities.Xor(span2, S); int num2 = num; macBlock = new byte[macSize]; Span<byte> span3 = span2.Slice(0, macSize); span3.CopyTo(macBlock); if (forEncryption) { byte[] source = macBlock; int num3 = bufOff; source.CopyTo(output.Slice(num3, output.Length - num3)); num2 += macSize; } else { int num3 = macSize; Span<byte> span4 = new Span<byte>(stackalloc byte[(int)(uint)num3], num3); span3 = bufBlock.AsSpan(num, macSize); span3.CopyTo(span4); if (!Arrays.FixedTimeEquals(macBlock, span4)) throw new InvalidCipherTextException("mac check in GCM failed"); } Reset(false); return num2; } public void Reset() { Reset(true); } private void Reset(bool clearMac) { S = new byte[16]; S_at = new byte[16]; S_atPre = new byte[16]; atBlock = new byte[16]; atBlockPos = 0; atLength = 0; atLengthPre = 0; counter = Arrays.Clone(J0); counter32 = Pack.BE_To_UInt32(counter, 12); blocksRemaining = 4294967294; bufOff = 0; totalLength = 0; if (bufBlock != null) Arrays.Fill(bufBlock, 0); if (clearMac) macBlock = null; if (forEncryption) initialised = false; else if (initialAssociatedText != null) { ProcessAadBytes(initialAssociatedText); } } private unsafe void DecryptBlock(ReadOnlySpan<byte> input, Span<byte> output) { Span<byte> span = new Span<byte>(stackalloc byte[16], 16); GetNextCtrBlock(span); if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { Vector128<byte> right = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> value = MemoryMarshal.Read<Vector128<byte>>(span); Vector128<byte> value2 = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value = System.Runtime.Intrinsics.X86.Sse2.Xor(value, right); value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(value2, right); MemoryMarshal.Write(output, ref value); MemoryMarshal.Write(S.AsSpan(), ref value2); } else { for (int i = 0; i < 16; i += 4) { byte b = input[i]; byte b2 = input[i + 1]; byte b3 = input[i + 2]; byte b4 = input[i + 3]; S[i] ^= b; S[i + 1] ^= b2; S[i + 2] ^= b3; S[i + 3] ^= b4; output[i] = (byte)(b ^ span[i]); output[i + 1] = (byte)(b2 ^ span[i + 1]); output[i + 2] = (byte)(b3 ^ span[i + 2]); output[i + 3] = (byte)(b4 ^ span[i + 3]); } } multiplier.MultiplyH(S); } private unsafe void DecryptBlocks2(ReadOnlySpan<byte> input, Span<byte> output) { Span<byte> span = new Span<byte>(stackalloc byte[16], 16); GetNextCtrBlock(span); if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { Vector128<byte> right = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> value = MemoryMarshal.Read<Vector128<byte>>(span); Vector128<byte> value2 = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value = System.Runtime.Intrinsics.X86.Sse2.Xor(value, right); value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(value2, right); MemoryMarshal.Write(output, ref value); MemoryMarshal.Write(S.AsSpan(), ref value2); } else { for (int i = 0; i < 16; i += 4) { byte b = input[i]; byte b2 = input[i + 1]; byte b3 = input[i + 2]; byte b4 = input[i + 3]; S[i] ^= b; S[i + 1] ^= b2; S[i + 2] ^= b3; S[i + 3] ^= b4; output[i] = (byte)(b ^ span[i]); output[i + 1] = (byte)(b2 ^ span[i + 1]); output[i + 2] = (byte)(b3 ^ span[i + 2]); output[i + 3] = (byte)(b4 ^ span[i + 3]); } } multiplier.MultiplyH(S); input = input.Slice(16, input.Length - 16); output = output.Slice(16, output.Length - 16); GetNextCtrBlock(span); if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { Vector128<byte> right2 = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> value3 = MemoryMarshal.Read<Vector128<byte>>(span); Vector128<byte> value4 = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, right2); value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(value4, right2); MemoryMarshal.Write(output, ref value3); MemoryMarshal.Write(S.AsSpan(), ref value4); } else { for (int j = 0; j < 16; j += 4) { byte b5 = input[j]; byte b6 = input[j + 1]; byte b7 = input[j + 2]; byte b8 = input[j + 3]; S[j] ^= b5; S[j + 1] ^= b6; S[j + 2] ^= b7; S[j + 3] ^= b8; output[j] = (byte)(b5 ^ span[j]); output[j + 1] = (byte)(b6 ^ span[j + 1]); output[j + 2] = (byte)(b7 ^ span[j + 2]); output[j + 3] = (byte)(b8 ^ span[j + 3]); } } multiplier.MultiplyH(S); } private unsafe void DecryptBlocks4(ref ReadOnlySpan<byte> input, ref Span<byte> output, int limit) { if (!IsFourWaySupported) throw new PlatformNotSupportedException("DecryptBlocks4"); if (limit < 64) throw new ArgumentOutOfRangeException("limit"); ref Vector128<ulong> reference; reference = ref HPow[3]; Span<Vector128<byte>> span = new Span<Vector128<byte>>(stackalloc Vector128<byte>[4], 4); Span<byte> blocks = MemoryMarshal.AsBytes(span); Vector128<byte> value = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value, ReverseBytesMask); while (input.Length >= limit) { ref byte reference2; reference2 = ref input[63]; ref byte reference3; reference3 = ref output[63]; GetNextCtrBlocks4(blocks); Vector128<byte> vector = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> vector2 = MemoryMarshal.Read<Vector128<byte>>(input.Slice(16, input.Length - 16)); Vector128<byte> vector3 = MemoryMarshal.Read<Vector128<byte>>(input.Slice(32, input.Length - 32)); Vector128<byte> vector4 = MemoryMarshal.Read<Vector128<byte>>(input.Slice(48, input.Length - 48)); Vector128<byte> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector, span[0]); Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector2, span[1]); Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector3, span[2]); Vector128<byte> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(vector4, span[3]); MemoryMarshal.Write(output, ref value2); MemoryMarshal.Write(output.Slice(16, output.Length - 16), ref value3); MemoryMarshal.Write(output.Slice(32, output.Length - 32), ref value4); MemoryMarshal.Write(output.Slice(48, output.Length - 48), ref value5); input = input.Slice(64, input.Length - 64); output = output.Slice(64, output.Length - 64); Vector128<byte> left = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(vector, ReverseBytesMask); Vector128<byte> vector5 = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(vector2, ReverseBytesMask); Vector128<byte> vector6 = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(vector3, ReverseBytesMask); Vector128<byte> vector7 = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(vector4, ReverseBytesMask); GcmUtilities.MultiplyExt(System.Runtime.Intrinsics.X86.Sse2.Xor(left, value).AsUInt64(), HPow[0], out Vector128<ulong> Z, out Vector128<ulong> Z2, out Vector128<ulong> Z3); GcmUtilities.MultiplyExt(vector5.AsUInt64(), HPow[1], out Vector128<ulong> Z4, out Vector128<ulong> Z5, out Vector128<ulong> Z6); GcmUtilities.MultiplyExt(vector6.AsUInt64(), HPow[2], out Vector128<ulong> Z7, out Vector128<ulong> Z8, out Vector128<ulong> Z9); GcmUtilities.MultiplyExt(vector7.AsUInt64(), HPow[3], out Vector128<ulong> Z10, out Vector128<ulong> Z11, out Vector128<ulong> Z12); Z = System.Runtime.Intrinsics.X86.Sse2.Xor(Z, Z4); Z2 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z2, Z5); Z3 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z3, Z6); Z = System.Runtime.Intrinsics.X86.Sse2.Xor(Z, Z7); Z2 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z2, Z8); Z3 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z3, Z9); Z = System.Runtime.Intrinsics.X86.Sse2.Xor(Z, Z10); Z2 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z2, Z11); Z3 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z3, Z12); value = GcmUtilities.Reduce3(Z, Z2, Z3).AsByte(); } value = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value, ReverseBytesMask); MemoryMarshal.Write(S.AsSpan(), ref value); } private unsafe void EncryptBlock(ReadOnlySpan<byte> input, Span<byte> output) { Span<byte> span = new Span<byte>(stackalloc byte[16], 16); GetNextCtrBlock(span); if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { Vector128<byte> right = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> value = MemoryMarshal.Read<Vector128<byte>>(span); Vector128<byte> value2 = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value = System.Runtime.Intrinsics.X86.Sse2.Xor(value, right); value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(value2, value); MemoryMarshal.Write(output, ref value); MemoryMarshal.Write(S.AsSpan(), ref value2); } else { for (int i = 0; i < 16; i += 4) { byte b = (byte)(span[i] ^ input[i]); byte b2 = (byte)(span[i + 1] ^ input[i + 1]); byte b3 = (byte)(span[i + 2] ^ input[i + 2]); byte b4 = (byte)(span[i + 3] ^ input[i + 3]); S[i] ^= b; S[i + 1] ^= b2; S[i + 2] ^= b3; S[i + 3] ^= b4; output[i] = b; output[i + 1] = b2; output[i + 2] = b3; output[i + 3] = b4; } } multiplier.MultiplyH(S); } private unsafe void EncryptBlocks2(ReadOnlySpan<byte> input, Span<byte> output) { Span<byte> span = new Span<byte>(stackalloc byte[32], 32); GetNextCtrBlocks2(span); if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { Vector128<byte> right = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> value = MemoryMarshal.Read<Vector128<byte>>(span); Vector128<byte> value2 = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value = System.Runtime.Intrinsics.X86.Sse2.Xor(value, right); value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(value2, value); MemoryMarshal.Write(output, ref value); MemoryMarshal.Write(S.AsSpan(), ref value2); } else { for (int i = 0; i < 16; i += 4) { byte b = (byte)(span[i] ^ input[i]); byte b2 = (byte)(span[i + 1] ^ input[i + 1]); byte b3 = (byte)(span[i + 2] ^ input[i + 2]); byte b4 = (byte)(span[i + 3] ^ input[i + 3]); S[i] ^= b; S[i + 1] ^= b2; S[i + 2] ^= b3; S[i + 3] ^= b4; output[i] = b; output[i + 1] = b2; output[i + 2] = b3; output[i + 3] = b4; } } multiplier.MultiplyH(S); input = input.Slice(16, input.Length - 16); output = output.Slice(16, output.Length - 16); span = span.Slice(16, span.Length - 16); if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && Vector.IsPacked) { Vector128<byte> right2 = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> value3 = MemoryMarshal.Read<Vector128<byte>>(span); Vector128<byte> value4 = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(value3, right2); value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(value4, value3); MemoryMarshal.Write(output, ref value3); MemoryMarshal.Write(S.AsSpan(), ref value4); } else { for (int j = 0; j < 16; j += 4) { byte b5 = (byte)(span[j] ^ input[j]); byte b6 = (byte)(span[j + 1] ^ input[j + 1]); byte b7 = (byte)(span[j + 2] ^ input[j + 2]); byte b8 = (byte)(span[j + 3] ^ input[j + 3]); S[j] ^= b5; S[j + 1] ^= b6; S[j + 2] ^= b7; S[j + 3] ^= b8; output[j] = b5; output[j + 1] = b6; output[j + 2] = b7; output[j + 3] = b8; } } multiplier.MultiplyH(S); } private unsafe void EncryptBlocks4(ref ReadOnlySpan<byte> input, ref Span<byte> output) { if (!IsFourWaySupported) throw new PlatformNotSupportedException("EncryptBlocks4"); ref Vector128<ulong> reference; reference = ref HPow[3]; Span<Vector128<byte>> span = new Span<Vector128<byte>>(stackalloc Vector128<byte>[4], 4); Span<byte> blocks = MemoryMarshal.AsBytes(span); Vector128<byte> value = MemoryMarshal.Read<Vector128<byte>>(S.AsSpan()); value = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value, ReverseBytesMask); while (input.Length >= 64) { ref byte reference2; reference2 = ref output[63]; GetNextCtrBlocks4(blocks); Vector128<byte> left = MemoryMarshal.Read<Vector128<byte>>(input); Vector128<byte> left2 = MemoryMarshal.Read<Vector128<byte>>(input.Slice(16, input.Length - 16)); Vector128<byte> left3 = MemoryMarshal.Read<Vector128<byte>>(input.Slice(32, input.Length - 32)); Vector128<byte> left4 = MemoryMarshal.Read<Vector128<byte>>(input.Slice(48, input.Length - 48)); Vector128<byte> value2 = System.Runtime.Intrinsics.X86.Sse2.Xor(left, span[0]); Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Sse2.Xor(left2, span[1]); Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Sse2.Xor(left3, span[2]); Vector128<byte> value5 = System.Runtime.Intrinsics.X86.Sse2.Xor(left4, span[3]); MemoryMarshal.Write(output, ref value2); MemoryMarshal.Write(output.Slice(16, output.Length - 16), ref value3); MemoryMarshal.Write(output.Slice(32, output.Length - 32), ref value4); MemoryMarshal.Write(output.Slice(48, output.Length - 48), ref value5); input = input.Slice(64, input.Length - 64); output = output.Slice(64, output.Length - 64); Vector128<byte> left5 = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value2, ReverseBytesMask); Vector128<byte> vector = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value3, ReverseBytesMask); Vector128<byte> vector2 = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value4, ReverseBytesMask); Vector128<byte> vector3 = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value5, ReverseBytesMask); GcmUtilities.MultiplyExt(System.Runtime.Intrinsics.X86.Sse2.Xor(left5, value).AsUInt64(), HPow[0], out Vector128<ulong> Z, out Vector128<ulong> Z2, out Vector128<ulong> Z3); GcmUtilities.MultiplyExt(vector.AsUInt64(), HPow[1], out Vector128<ulong> Z4, out Vector128<ulong> Z5, out Vector128<ulong> Z6); GcmUtilities.MultiplyExt(vector2.AsUInt64(), HPow[2], out Vector128<ulong> Z7, out Vector128<ulong> Z8, out Vector128<ulong> Z9); GcmUtilities.MultiplyExt(vector3.AsUInt64(), HPow[3], out Vector128<ulong> Z10, out Vector128<ulong> Z11, out Vector128<ulong> Z12); Z = System.Runtime.Intrinsics.X86.Sse2.Xor(Z, Z4); Z2 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z2, Z5); Z3 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z3, Z6); Z = System.Runtime.Intrinsics.X86.Sse2.Xor(Z, Z7); Z2 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z2, Z8); Z3 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z3, Z9); Z = System.Runtime.Intrinsics.X86.Sse2.Xor(Z, Z10); Z2 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z2, Z11); Z3 = System.Runtime.Intrinsics.X86.Sse2.Xor(Z3, Z12); value = GcmUtilities.Reduce3(Z, Z2, Z3).AsByte(); } value = System.Runtime.Intrinsics.X86.Ssse3.Shuffle(value, ReverseBytesMask); MemoryMarshal.Write(S.AsSpan(), ref value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void GetNextCtrBlock(Span<byte> block) { Pack.UInt32_To_BE(++counter32, counter, 12); cipher.ProcessBlock(counter, block); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void GetNextCtrBlocks2(Span<byte> blocks) { Pack.UInt32_To_BE(++counter32, counter, 12); cipher.ProcessBlock(counter, blocks); Pack.UInt32_To_BE(++counter32, counter, 12); cipher.ProcessBlock(counter, blocks.Slice(16, blocks.Length - 16)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void GetNextCtrBlocks4(Span<byte> blocks) { uint num = counter32; uint n = num + 1; uint n2 = num + 2; uint n3 = num + 3; uint n4 = counter32 = num + 4; if (AesEngine_X86.IsSupported) { IBlockCipher blockCipher = cipher; if (blockCipher is AesEngine_X86) { AesEngine_X86 aesEngine_X = (AesEngine_X86)blockCipher; counter.CopyTo(blocks); counter.CopyTo(blocks.Slice(16, blocks.Length - 16)); counter.CopyTo(blocks.Slice(32, blocks.Length - 32)); Pack.UInt32_To_BE(n4, counter, 12); Pack.UInt32_To_BE(n, blocks.Slice(12, blocks.Length - 12)); Pack.UInt32_To_BE(n2, blocks.Slice(28, blocks.Length - 28)); Pack.UInt32_To_BE(n3, blocks.Slice(44, blocks.Length - 44)); counter.CopyTo(blocks.Slice(48, blocks.Length - 48)); aesEngine_X.ProcessFourBlocks(blocks, blocks); return; } } Pack.UInt32_To_BE(n, counter, 12); cipher.ProcessBlock(counter, blocks); Pack.UInt32_To_BE(n2, counter, 12); cipher.ProcessBlock(counter, blocks.Slice(16, blocks.Length - 16)); Pack.UInt32_To_BE(n3, counter, 12); cipher.ProcessBlock(counter, blocks.Slice(32, blocks.Length - 32)); Pack.UInt32_To_BE(n4, counter, 12); cipher.ProcessBlock(counter, blocks.Slice(48, blocks.Length - 48)); } private unsafe void ProcessPartial(Span<byte> partialBlock, Span<byte> output) { Span<byte> span = new Span<byte>(stackalloc byte[16], 16); GetNextCtrBlock(span); if (forEncryption) { GcmUtilities.Xor(partialBlock, span, partialBlock.Length); gHASHPartial(S, partialBlock); } else { gHASHPartial(S, partialBlock); GcmUtilities.Xor(partialBlock, span, partialBlock.Length); } partialBlock.CopyTo(output); totalLength += (uint)partialBlock.Length; } private void gHASH(byte[] Y, byte[] b, int len) { for (int i = 0; i < len; i += 16) { int len2 = System.Math.Min(len - i, 16); gHASHPartial(Y, b, i, len2); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void gHASHBlock(byte[] Y, ReadOnlySpan<byte> b) { GcmUtilities.Xor(Y, b); multiplier.MultiplyH(Y); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void gHASHPartial(byte[] Y, ReadOnlySpan<byte> b) { GcmUtilities.Xor(Y, b, b.Length); multiplier.MultiplyH(Y); } private void gHASHPartial(byte[] Y, byte[] b, int off, int len) { GcmUtilities.Xor(Y, b, off, len); multiplier.MultiplyH(Y); } private void CheckStatus() { if (!initialised) { if (forEncryption) throw new InvalidOperationException("GCM cipher cannot be reused for encryption"); throw new InvalidOperationException("GCM cipher needs to be initialized"); } } } }