Haraka512_X86
using Org.BouncyCastle.Runtime.Intrinsics;
using Org.BouncyCastle.Runtime.Intrinsics.X86;
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Org.BouncyCastle.Crypto.Digests
{
public static class Haraka512_X86
{
internal static readonly Vector128<byte>[] DefaultRoundConstants = new Vector128<byte>[40] {
Vector128.Create(157, 123, 129, 117, 240, 254, 197, 178, 10, 192, 32, 230, 76, 112, 132, 6),
Vector128.Create(23, 247, 8, 47, 164, 107, 15, 100, 107, 160, 243, 136, 225, 180, 102, 139),
Vector128.Create(20, 145, 2, 159, 96, 157, 2, 207, 152, 132, 242, 83, 45, 222, 2, 52),
Vector128.Create(121, 79, 91, 253, 175, 188, 243, 187, 8, 79, 123, 46, 230, 234, 214, 14),
Vector128.Create(68, 112, 57, 190, 28, 205, 238, 121, 139, 68, 114, 72, 203, 176, 207, 203),
Vector128.Create(123, 5, 138, 43, 237, 53, 83, 141, 183, 50, 144, 110, 238, 205, 234, 126),
Vector128.Create(27, 239, 79, 218, 97, 39, 65, 226, 208, 124, 46, 94, 67, 143, 194, 103),
Vector128.Create(59, 11, 199, 31, 226, 253, 95, 103, 7, 204, 202, 175, 176, 217, 36, 41),
Vector128.Create(238, 101, 212, 185, 202, 143, 219, 236, 233, 127, 134, 230, 241, 99, 77, 171),
Vector128.Create(51, 126, 3, 173, 79, 64, 42, 91, 100, 205, 183, 212, 132, 191, 48, 28),
Vector128.Create(0, 152, 246, 141, 46, 139, 2, 105, 191, 35, 23, 148, 185, 11, 204, 178),
Vector128.Create(138, 45, 157, 92, 200, 158, 170, 74, 114, 85, 111, 222, 166, 120, 4, 250),
Vector128.Create(212, 159, 18, 41, 46, 79, 250, 14, 18, 42, 119, 107, 43, 159, 180, 223),
Vector128.Create(238, 18, 106, 187, 174, 17, 214, 50, 54, 162, 73, 244, 68, 3, 161, 30),
Vector128.Create(166, 236, 168, 156, 201, 0, 150, 95, 132, 0, 5, 75, 136, 73, 4, 175),
Vector128.Create(236, 147, 229, 39, 227, 199, 162, 120, 79, 156, 25, 157, 216, 94, 2, 33),
Vector128.Create(115, 1, 212, 130, 205, 46, 40, 185, 183, 201, 89, 167, 248, 170, 58, 191),
Vector128.Create(107, 125, 48, 16, 217, 239, 242, 55, 23, 176, 134, 97, 13, 112, 96, 98),
Vector128.Create(198, 154, 252, 246, 83, 145, 194, 129, 67, 4, 48, 33, 194, 69, 202, 90),
Vector128.Create(58, 148, 209, 54, 232, 146, 175, 44, 187, 104, 107, 34, 60, 151, 35, 146),
Vector128.Create(180, 113, 16, 229, 88, 185, 186, 108, 235, 134, 88, 34, 56, 146, 191, 211),
Vector128.Create(141, 18, 225, 36, 221, 253, 61, 147, 119, 198, 240, 174, 229, 60, 134, 219),
Vector128.Create(177, 18, 34, 203, 227, 141, 228, 131, 156, 160, 235, byte.MaxValue, 104, 98, 96, 187),
Vector128.Create(125, 247, 43, 199, 78, 26, 185, 45, 156, 209, 228, 226, 220, 211, 75, 115),
Vector128.Create(78, 146, 179, 44, 196, 21, 20, 75, 67, 27, 48, 97, 195, 71, 187, 67),
Vector128.Create(153, 104, 235, 22, 221, 49, 178, 3, 246, 239, 7, 231, 168, 117, 167, 219),
Vector128.Create(44, 71, 202, 126, 2, 35, 94, 142, 119, 89, 117, 60, 75, 97, 243, 109),
Vector128.Create(249, 23, 134, 184, 185, 229, 27, 109, 119, 125, 222, 214, 23, 90, 167, 205),
Vector128.Create(93, 238, 70, 169, 157, 6, 108, 157, 170, 233, 168, 107, 240, 67, 107, 236),
Vector128.Create(193, 39, 243, 59, 89, 17, 83, 162, 43, 51, 87, 249, 80, 105, 30, 203),
Vector128.Create(217, 208, 14, 96, 83, 3, 237, 228, 156, 97, 218, 0, 117, 12, 238, 44),
Vector128.Create(80, 163, 164, 99, 188, 186, 187, 128, 171, 12, 233, 150, 161, 165, 177, 240),
Vector128.Create(57, 202, 141, 147, 48, 222, 13, 171, 136, 41, 150, 94, 2, 177, 61, 174),
Vector128.Create(66, 180, 117, 46, 168, 243, 20, 136, 11, 164, 84, 213, 56, 143, 187, 23),
Vector128.Create(246, 22, 10, 54, 121, 183, 182, 174, 215, 127, 66, 95, 91, 138, 187, 52),
Vector128.Create(222, 175, 186, byte.MaxValue, 24, 89, 206, 67, 56, 84, 229, 203, 65, 82, 246, 38),
Vector128.Create(120, 201, 158, 131, 247, 156, 202, 162, 106, 2, 243, 185, 84, 154, 233, 76),
Vector128.Create(53, 18, 144, 34, 40, 110, 192, 64, 190, 247, 223, 27, 26, 165, 81, 174),
Vector128.Create(207, 89, 166, 72, 15, 188, 115, 193, 43, 210, 126, 186, 60, 97, 193, 160),
Vector128.Create(161, 157, 197, 233, 253, 189, 214, 74, 136, 130, 40, 2, 3, 204, 106, 117)
};
public static bool IsSupported => Org.BouncyCastle.Runtime.Intrinsics.X86.Aes.IsEnabled;
public static void Hash(ReadOnlySpan<byte> input, Span<byte> output)
{
if (!IsSupported)
throw new PlatformNotSupportedException("Haraka512_X86");
Vector128<byte> s = Load128(input.Slice(0, 16));
Vector128<byte> s2 = Load128(input.Slice(16, 16));
Vector128<byte> s3 = Load128(input.Slice(32, 16));
Vector128<byte> s4 = Load128(input.Slice(48, 16));
ImplRounds(ref s, ref s2, ref s3, ref s4, DefaultRoundConstants.AsSpan(0, 40));
s = System.Runtime.Intrinsics.X86.Sse2.Xor(s, Load128(input.Slice(0, 16)));
s2 = System.Runtime.Intrinsics.X86.Sse2.Xor(s2, Load128(input.Slice(16, 16)));
s3 = System.Runtime.Intrinsics.X86.Sse2.Xor(s3, Load128(input.Slice(32, 16)));
s4 = System.Runtime.Intrinsics.X86.Sse2.Xor(s4, Load128(input.Slice(48, 16)));
Store64(s.GetUpper(), output.Slice(0, 8));
Store64(s2.GetUpper(), output.Slice(8, 8));
Store64(s3.GetLower(), output.Slice(16, 8));
Store64(s4.GetLower(), output.Slice(24, 8));
}
public static void Hash(ReadOnlySpan<byte> input, Span<byte> output, ReadOnlySpan<Vector128<byte>> roundConstants)
{
if (!IsSupported)
throw new PlatformNotSupportedException("Haraka512_X86");
Vector128<byte> s = Load128(input.Slice(0, 16));
Vector128<byte> s2 = Load128(input.Slice(16, 16));
Vector128<byte> s3 = Load128(input.Slice(32, 16));
Vector128<byte> s4 = Load128(input.Slice(48, 16));
ImplRounds(ref s, ref s2, ref s3, ref s4, roundConstants.Slice(0, 40));
s = System.Runtime.Intrinsics.X86.Sse2.Xor(s, Load128(input.Slice(0, 16)));
s2 = System.Runtime.Intrinsics.X86.Sse2.Xor(s2, Load128(input.Slice(16, 16)));
s3 = System.Runtime.Intrinsics.X86.Sse2.Xor(s3, Load128(input.Slice(32, 16)));
s4 = System.Runtime.Intrinsics.X86.Sse2.Xor(s4, Load128(input.Slice(48, 16)));
Store64(s.GetUpper(), output.Slice(0, 8));
Store64(s2.GetUpper(), output.Slice(8, 8));
Store64(s3.GetLower(), output.Slice(16, 8));
Store64(s4.GetLower(), output.Slice(24, 8));
}
public static void Permute(ReadOnlySpan<byte> input, Span<byte> output)
{
if (!IsSupported)
throw new PlatformNotSupportedException("Haraka512_X86");
Vector128<byte> s = Load128(input.Slice(0, 16));
Vector128<byte> s2 = Load128(input.Slice(16, 16));
Vector128<byte> s3 = Load128(input.Slice(32, 16));
Vector128<byte> s4 = Load128(input.Slice(48, 16));
ImplRounds(ref s, ref s2, ref s3, ref s4, DefaultRoundConstants.AsSpan(0, 40));
Store128(s, output.Slice(0, 16));
Store128(s2, output.Slice(16, 16));
Store128(s3, output.Slice(32, 16));
Store128(s4, output.Slice(48, 16));
}
public static void Permute(ReadOnlySpan<byte> input, Span<byte> output, ReadOnlySpan<Vector128<byte>> roundConstants)
{
if (!IsSupported)
throw new PlatformNotSupportedException("Haraka512_X86");
Vector128<byte> s = Load128(input.Slice(0, 16));
Vector128<byte> s2 = Load128(input.Slice(16, 16));
Vector128<byte> s3 = Load128(input.Slice(32, 16));
Vector128<byte> s4 = Load128(input.Slice(48, 16));
ImplRounds(ref s, ref s2, ref s3, ref s4, roundConstants.Slice(0, 40));
Store128(s, output.Slice(0, 16));
Store128(s2, output.Slice(16, 16));
Store128(s3, output.Slice(32, 16));
Store128(s4, output.Slice(48, 16));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ImplRounds(ref Vector128<byte> s0, ref Vector128<byte> s1, ref Vector128<byte> s2, ref Vector128<byte> s3, ReadOnlySpan<Vector128<byte>> rc)
{
ImplRound(ref s0, ref s1, ref s2, ref s3, rc.Slice(0, 8));
ImplRound(ref s0, ref s1, ref s2, ref s3, rc.Slice(8, 8));
ImplRound(ref s0, ref s1, ref s2, ref s3, rc.Slice(16, 8));
ImplRound(ref s0, ref s1, ref s2, ref s3, rc.Slice(24, 8));
ImplRound(ref s0, ref s1, ref s2, ref s3, rc.Slice(32, 8));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ImplRound(ref Vector128<byte> s0, ref Vector128<byte> s1, ref Vector128<byte> s2, ref Vector128<byte> s3, ReadOnlySpan<Vector128<byte>> rc)
{
ImplAes(ref s0, ref s1, ref s2, ref s3, rc.Slice(0, 8));
ImplMix(ref s0, ref s1, ref s2, ref s3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ImplAes(ref Vector128<byte> s0, ref Vector128<byte> s1, ref Vector128<byte> s2, ref Vector128<byte> s3, ReadOnlySpan<Vector128<byte>> rc)
{
Vector128<byte> value = System.Runtime.Intrinsics.X86.Aes.Encrypt(s0, rc[0]);
Vector128<byte> value2 = System.Runtime.Intrinsics.X86.Aes.Encrypt(s1, rc[1]);
Vector128<byte> value3 = System.Runtime.Intrinsics.X86.Aes.Encrypt(s2, rc[2]);
Vector128<byte> value4 = System.Runtime.Intrinsics.X86.Aes.Encrypt(s3, rc[3]);
s0 = System.Runtime.Intrinsics.X86.Aes.Encrypt(value, rc[4]);
s1 = System.Runtime.Intrinsics.X86.Aes.Encrypt(value2, rc[5]);
s2 = System.Runtime.Intrinsics.X86.Aes.Encrypt(value3, rc[6]);
s3 = System.Runtime.Intrinsics.X86.Aes.Encrypt(value4, rc[7]);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ImplMix(ref Vector128<byte> s0, ref Vector128<byte> s1, ref Vector128<byte> s2, ref Vector128<byte> s3)
{
Vector128<uint> left = s0.AsUInt32();
Vector128<uint> right = s1.AsUInt32();
Vector128<uint> left2 = s2.AsUInt32();
Vector128<uint> right2 = s3.AsUInt32();
Vector128<uint> right3 = System.Runtime.Intrinsics.X86.Sse2.UnpackLow(left, right);
Vector128<uint> left3 = System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(left, right);
Vector128<uint> left4 = System.Runtime.Intrinsics.X86.Sse2.UnpackLow(left2, right2);
Vector128<uint> right4 = System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(left2, right2);
s0 = System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(left3, right4).AsByte();
s1 = System.Runtime.Intrinsics.X86.Sse2.UnpackLow(left4, right3).AsByte();
s2 = System.Runtime.Intrinsics.X86.Sse2.UnpackHigh(left4, right3).AsByte();
s3 = System.Runtime.Intrinsics.X86.Sse2.UnpackLow(left3, right4).AsByte();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> Load128(ReadOnlySpan<byte> t)
{
if (Vector.IsPackedLittleEndian)
return MemoryMarshal.Read<Vector128<byte>>(t);
return Vector128.Create(BinaryPrimitives.ReadUInt64LittleEndian(t.Slice(0, 8)), BinaryPrimitives.ReadUInt64LittleEndian(t.Slice(8, t.Length - 8))).AsByte();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Store128(Vector128<byte> s, Span<byte> t)
{
if (Vector.IsPackedLittleEndian)
MemoryMarshal.Write(t, ref s);
else {
Vector128<ulong> vector = s.AsUInt64();
BinaryPrimitives.WriteUInt64LittleEndian(t.Slice(0, 8), vector.GetElement(0));
BinaryPrimitives.WriteUInt64LittleEndian(t.Slice(8, t.Length - 8), vector.GetElement(1));
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Store64(Vector64<byte> s, Span<byte> t)
{
if (Vector.IsPackedLittleEndian)
MemoryMarshal.Write(t, ref s);
else {
Vector64<ulong> vector = s.AsUInt64();
BinaryPrimitives.WriteUInt64LittleEndian(t, vector.ToScalar());
}
}
}
}