<PackageReference Include="System.Text.Encodings.Web" Version="6.0.0-preview.2.21154.6" />

UnicodeHelpers

static class UnicodeHelpers
using System.Buffers; using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; namespace System.Text.Unicode { internal static class UnicodeHelpers { private const char UNICODE_REPLACEMENT_CHAR = '�'; internal const int UNICODE_LAST_CODEPOINT = 1114111; private static readonly uint[] _definedCharacterBitmapBigEndian = BitConverter.IsLittleEndian ? null : CreateDefinedCharacterBitmapMachineEndian(); private unsafe static ReadOnlySpan<byte> DefinedCharsBitmapSpan => new ReadOnlySpan<byte>(&global::<PrivateImplementationDetails>.8B30AFDCF07C4ABDFE0FAF65F79FC40A2E9AC497C42B1BA5C996BDFB3F6EC2F6, 8192); private static uint[] CreateDefinedCharacterBitmapMachineEndian() { ReadOnlySpan<byte> source = DefinedCharsBitmapSpan; uint[] array = new uint[source.Length / 4]; for (int i = 0; i < array.Length; i++) { array[i] = BinaryPrimitives.ReadUInt32LittleEndian(source); source = source.Slice(4); } return array; } public static OperationStatus DecodeScalarValueFromUtf8(ReadOnlySpan<byte> source, out uint result, out int bytesConsumed) { int num = 0; uint num2; if ((uint)num < (uint)source.Length) { num2 = source[num]; if (System.Text.UnicodeUtility.IsAsciiCodePoint(num2)) goto IL_0021; if (System.Text.UnicodeUtility.IsInRangeInclusive(num2, 194, 244)) { num2 = num2 - 194 << 6; num++; if ((uint)num >= (uint)source.Length) goto IL_0150; int num3 = (sbyte)source[num]; if (num3 < -64) { num2 = (uint)((int)num2 + num3); num2 += 128; num2 += 128; if (num2 < 2048) goto IL_0021; if (System.Text.UnicodeUtility.IsInRangeInclusive(num2, 2080, 3343) && !System.Text.UnicodeUtility.IsInRangeInclusive(num2, 2912, 2943) && !System.Text.UnicodeUtility.IsInRangeInclusive(num2, 3072, 3087)) { num++; if ((uint)num >= (uint)source.Length) goto IL_0150; num3 = (sbyte)source[num]; if (num3 < -64) { num2 <<= 6; num2 = (uint)((int)num2 + num3); num2 += 128; num2 -= 131072; if (num2 > 65535) { num++; if ((uint)num >= (uint)source.Length) goto IL_0150; num3 = (sbyte)source[num]; if (num3 >= -64) goto IL_0144; num2 <<= 6; num2 = (uint)((int)num2 + num3); num2 += 128; num2 -= 4194304; } goto IL_0021; } } } } else num = 1; goto IL_0144; } goto IL_0150; IL_0021: bytesConsumed = num + 1; result = num2; return OperationStatus.Done; IL_0144: bytesConsumed = num; result = 65533; return OperationStatus.InvalidData; IL_0150: bytesConsumed = num; result = 65533; return OperationStatus.NeedMoreData; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ReadOnlySpan<uint> GetDefinedCharacterBitmap() { if (BitConverter.IsLittleEndian) return MemoryMarshal.Cast<byte, uint>(DefinedCharsBitmapSpan); return _definedCharacterBitmapBigEndian; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int GetScalarValueFromUtf16(char first, char? second, out bool wasSurrogatePair, out bool needsMoreData) { if (!char.IsSurrogate(first)) { wasSurrogatePair = false; needsMoreData = false; return first; } return GetScalarValueFromUtf16Slow(first, second, out wasSurrogatePair, out needsMoreData); } private static int GetScalarValueFromUtf16Slow(char first, char? second, out bool wasSurrogatePair, out bool needMoreData) { if (char.IsHighSurrogate(first)) { if (second.HasValue) { if (char.IsLowSurrogate(second.Value)) { wasSurrogatePair = true; needMoreData = false; return GetScalarValueFromUtf16SurrogatePair(first, second.Value); } wasSurrogatePair = false; needMoreData = false; return 65533; } wasSurrogatePair = false; needMoreData = true; return 65533; } wasSurrogatePair = false; needMoreData = false; return 65533; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal unsafe static int GetScalarValueFromUtf16(char* pChar, bool endOfString) { char c = *pChar; if (!char.IsSurrogate(c)) return c; return GetScalarValueFromUtf16Slow(pChar, endOfString); } private unsafe static int GetScalarValueFromUtf16Slow(char* pChar, bool endOfString) { char c = *pChar; if (!char.IsSurrogate(c)) return c; if (char.IsHighSurrogate(c)) { if (endOfString) return 65533; char c2 = pChar[1]; if (char.IsLowSurrogate(c2)) return GetScalarValueFromUtf16SurrogatePair(c, c2); return 65533; } return 65533; } private static int GetScalarValueFromUtf16SurrogatePair(char highSurrogate, char lowSurrogate) { return (lowSurrogate & 1023) | ((highSurrogate & 1023) + 64 << 10); } internal static void GetUtf16SurrogatePairFromAstralScalarValue(int scalar, out char highSurrogate, out char lowSurrogate) { int num = scalar & 65535; int num2 = scalar >> 16; int num3 = num2 - 1; highSurrogate = (char)(55296 | (num3 << 6) | (num >> 10)); lowSurrogate = (char)(56320 | (num & 1023)); } internal static int GetUtf8RepresentationForScalarValue(uint scalar) { if (scalar <= 127) return (byte)scalar; if (scalar <= 2047) { byte b = (byte)(192 | (scalar >> 6)); byte b2 = (byte)(128 | (scalar & 63)); return (b2 << 8) | b; } if (scalar <= 65535) { byte b3 = (byte)(224 | (scalar >> 12)); byte b4 = (byte)(128 | ((scalar >> 6) & 63)); byte b5 = (byte)(128 | (scalar & 63)); return (((b5 << 8) | b4) << 8) | b3; } byte b6 = (byte)(240 | (scalar >> 18)); byte b7 = (byte)(128 | ((scalar >> 12) & 63)); byte b8 = (byte)(128 | ((scalar >> 6) & 63)); byte b9 = (byte)(128 | (scalar & 63)); return (((((b9 << 8) | b8) << 8) | b7) << 8) | b6; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool IsCharacterDefined(char c) { int index = (int)((uint)c >> 5); int num = c & 31; return ((GetDefinedCharacterBitmap()[index] >> num) & 1) != 0; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool IsSupplementaryCodePoint(int scalar) { return (scalar & -65536) != 0; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool IsUtf8ContinuationByte([In] [IsReadOnly] ref byte value) { return (sbyte)value < -64; } } }