<PackageReference Include="System.Text.Encodings.Web" Version="4.6.0-preview6.19264.9" />

UnicodeHelpers

static class UnicodeHelpers
using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; namespace System.Text.Unicode { internal static class UnicodeHelpers { private const char UNICODE_REPLACEMENT_CHAR = '�'; internal const int UNICODE_LAST_CODEPOINT = 1114111; private static uint[] _definedCharacterBitmapBigEndian = BitConverter.IsLittleEndian ? null : CreateDefinedCharacterBitmapMachineEndian(); private unsafe static ReadOnlySpan<byte> DefinedCharsBitmapSpan => new ReadOnlySpan<byte>(&global::<PrivateImplementationDetails>.B834DC34C55D9E54905161E0ED4AF27E7F1C49AF, 8192); private static uint[] CreateDefinedCharacterBitmapMachineEndian() { ReadOnlySpan<byte> source = DefinedCharsBitmapSpan; uint[] array = new uint[source.Length / 4]; for (int i = 0; i < array.Length; i++) { array[i] = BinaryPrimitives.ReadUInt32LittleEndian(source); source = source.Slice(4); } return array; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ReadOnlySpan<uint> GetDefinedCharacterBitmap() { if (BitConverter.IsLittleEndian) return MemoryMarshal.Cast<byte, uint>(DefinedCharsBitmapSpan); return _definedCharacterBitmapBigEndian; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int GetScalarValueFromUtf16(char first, char? second, out bool wasSurrogatePair) { if (!char.IsSurrogate(first)) { wasSurrogatePair = false; return first; } return GetScalarValueFromUtf16Slow(first, second, out wasSurrogatePair); } private static int GetScalarValueFromUtf16Slow(char first, char? second, out bool wasSurrogatePair) { if (char.IsHighSurrogate(first)) { if (second.HasValue) { if (char.IsLowSurrogate(second.Value)) { wasSurrogatePair = true; return GetScalarValueFromUtf16SurrogatePair(first, second.Value); } wasSurrogatePair = false; return 65533; } wasSurrogatePair = false; return 65533; } wasSurrogatePair = false; return 65533; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal unsafe static int GetScalarValueFromUtf16(char* pChar, bool endOfString) { char c = *pChar; if (!char.IsSurrogate(c)) return c; return GetScalarValueFromUtf16Slow(pChar, endOfString); } private unsafe static int GetScalarValueFromUtf16Slow(char* pChar, bool endOfString) { char c = *pChar; if (!char.IsSurrogate(c)) return c; if (char.IsHighSurrogate(c)) { if (endOfString) return 65533; char c2 = pChar[1]; if (char.IsLowSurrogate(c2)) return GetScalarValueFromUtf16SurrogatePair(c, c2); return 65533; } return 65533; } private static int GetScalarValueFromUtf16SurrogatePair(char highSurrogate, char lowSurrogate) { return (lowSurrogate & 1023) | ((highSurrogate & 1023) + 64 << 10); } internal static void GetUtf16SurrogatePairFromAstralScalarValue(int scalar, out char highSurrogate, out char lowSurrogate) { int num = scalar & 65535; int num2 = scalar >> 16; int num3 = num2 - 1; highSurrogate = (char)(55296 | (num3 << 6) | (num >> 10)); lowSurrogate = (char)(56320 | (num & 1023)); } internal static int GetUtf8RepresentationForScalarValue(uint scalar) { if (scalar <= 127) return (byte)scalar; if (scalar <= 2047) { byte b = (byte)(192 | (scalar >> 6)); byte b2 = (byte)(128 | (scalar & 63)); return (b2 << 8) | b; } if (scalar <= 65535) { byte b3 = (byte)(224 | (scalar >> 12)); byte b4 = (byte)(128 | ((scalar >> 6) & 63)); byte b5 = (byte)(128 | (scalar & 63)); return (((b5 << 8) | b4) << 8) | b3; } byte b6 = (byte)(240 | (scalar >> 18)); byte b7 = (byte)(128 | ((scalar >> 12) & 63)); byte b8 = (byte)(128 | ((scalar >> 6) & 63)); byte b9 = (byte)(128 | (scalar & 63)); return (((((b9 << 8) | b8) << 8) | b7) << 8) | b6; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool IsCharacterDefined(char c) { int index = (int)((uint)c >> 5); int num = c & 31; return ((GetDefinedCharacterBitmap()[index] >> num) & 1) != 0; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool IsSupplementaryCodePoint(int scalar) { return (scalar & -65536) != 0; } } }