UnicodeHelpers
using System.Buffers;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace System.Text.Unicode
{
internal static class UnicodeHelpers
{
private const char UNICODE_REPLACEMENT_CHAR = '�';
internal const int UNICODE_LAST_CODEPOINT = 1114111;
private static readonly uint[] _definedCharacterBitmapBigEndian = BitConverter.IsLittleEndian ? null : CreateDefinedCharacterBitmapMachineEndian();
private unsafe static ReadOnlySpan<byte> DefinedCharsBitmapSpan => new ReadOnlySpan<byte>(&global::<PrivateImplementationDetails>.8B30AFDCF07C4ABDFE0FAF65F79FC40A2E9AC497C42B1BA5C996BDFB3F6EC2F6, 8192);
private static uint[] CreateDefinedCharacterBitmapMachineEndian()
{
ReadOnlySpan<byte> source = DefinedCharsBitmapSpan;
uint[] array = new uint[source.Length / 4];
for (int i = 0; i < array.Length; i++) {
array[i] = BinaryPrimitives.ReadUInt32LittleEndian(source);
source = source.Slice(4);
}
return array;
}
public static OperationStatus DecodeScalarValueFromUtf8(ReadOnlySpan<byte> source, out uint result, out int bytesConsumed)
{
int num = 0;
uint num2;
if ((uint)num < (uint)source.Length) {
num2 = source[num];
if (System.Text.UnicodeUtility.IsAsciiCodePoint(num2))
goto IL_0021;
if (System.Text.UnicodeUtility.IsInRangeInclusive(num2, 194, 244)) {
num2 = num2 - 194 << 6;
num++;
if ((uint)num >= (uint)source.Length)
goto IL_0150;
int num3 = (sbyte)source[num];
if (num3 < -64) {
num2 = (uint)((int)num2 + num3);
num2 += 128;
num2 += 128;
if (num2 < 2048)
goto IL_0021;
if (System.Text.UnicodeUtility.IsInRangeInclusive(num2, 2080, 3343) && !System.Text.UnicodeUtility.IsInRangeInclusive(num2, 2912, 2943) && !System.Text.UnicodeUtility.IsInRangeInclusive(num2, 3072, 3087)) {
num++;
if ((uint)num >= (uint)source.Length)
goto IL_0150;
num3 = (sbyte)source[num];
if (num3 < -64) {
num2 <<= 6;
num2 = (uint)((int)num2 + num3);
num2 += 128;
num2 -= 131072;
if (num2 > 65535) {
num++;
if ((uint)num >= (uint)source.Length)
goto IL_0150;
num3 = (sbyte)source[num];
if (num3 >= -64)
goto IL_0144;
num2 <<= 6;
num2 = (uint)((int)num2 + num3);
num2 += 128;
num2 -= 4194304;
}
goto IL_0021;
}
}
}
} else
num = 1;
goto IL_0144;
}
goto IL_0150;
IL_0021:
bytesConsumed = num + 1;
result = num2;
return OperationStatus.Done;
IL_0144:
bytesConsumed = num;
result = 65533;
return OperationStatus.InvalidData;
IL_0150:
bytesConsumed = num;
result = 65533;
return OperationStatus.NeedMoreData;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static ReadOnlySpan<uint> GetDefinedCharacterBitmap()
{
if (BitConverter.IsLittleEndian)
return MemoryMarshal.Cast<byte, uint>(DefinedCharsBitmapSpan);
return _definedCharacterBitmapBigEndian;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int GetScalarValueFromUtf16(char first, char? second, out bool wasSurrogatePair, out bool needsMoreData)
{
if (!char.IsSurrogate(first)) {
wasSurrogatePair = false;
needsMoreData = false;
return first;
}
return GetScalarValueFromUtf16Slow(first, second, out wasSurrogatePair, out needsMoreData);
}
private static int GetScalarValueFromUtf16Slow(char first, char? second, out bool wasSurrogatePair, out bool needMoreData)
{
if (char.IsHighSurrogate(first)) {
if (second.HasValue) {
if (char.IsLowSurrogate(second.Value)) {
wasSurrogatePair = true;
needMoreData = false;
return GetScalarValueFromUtf16SurrogatePair(first, second.Value);
}
wasSurrogatePair = false;
needMoreData = false;
return 65533;
}
wasSurrogatePair = false;
needMoreData = true;
return 65533;
}
wasSurrogatePair = false;
needMoreData = false;
return 65533;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal unsafe static int GetScalarValueFromUtf16(char* pChar, bool endOfString)
{
char c = *pChar;
if (!char.IsSurrogate(c))
return c;
return GetScalarValueFromUtf16Slow(pChar, endOfString);
}
private unsafe static int GetScalarValueFromUtf16Slow(char* pChar, bool endOfString)
{
char c = *pChar;
if (!char.IsSurrogate(c))
return c;
if (char.IsHighSurrogate(c)) {
if (endOfString)
return 65533;
char c2 = pChar[1];
if (char.IsLowSurrogate(c2))
return GetScalarValueFromUtf16SurrogatePair(c, c2);
return 65533;
}
return 65533;
}
private static int GetScalarValueFromUtf16SurrogatePair(char highSurrogate, char lowSurrogate)
{
return (lowSurrogate & 1023) | ((highSurrogate & 1023) + 64 << 10);
}
internal static void GetUtf16SurrogatePairFromAstralScalarValue(int scalar, out char highSurrogate, out char lowSurrogate)
{
int num = scalar & 65535;
int num2 = scalar >> 16;
int num3 = num2 - 1;
highSurrogate = (char)(55296 | (num3 << 6) | (num >> 10));
lowSurrogate = (char)(56320 | (num & 1023));
}
internal static int GetUtf8RepresentationForScalarValue(uint scalar)
{
if (scalar <= 127)
return (byte)scalar;
if (scalar <= 2047) {
byte b = (byte)(192 | (scalar >> 6));
byte b2 = (byte)(128 | (scalar & 63));
return (b2 << 8) | b;
}
if (scalar <= 65535) {
byte b3 = (byte)(224 | (scalar >> 12));
byte b4 = (byte)(128 | ((scalar >> 6) & 63));
byte b5 = (byte)(128 | (scalar & 63));
return (((b5 << 8) | b4) << 8) | b3;
}
byte b6 = (byte)(240 | (scalar >> 18));
byte b7 = (byte)(128 | ((scalar >> 12) & 63));
byte b8 = (byte)(128 | ((scalar >> 6) & 63));
byte b9 = (byte)(128 | (scalar & 63));
return (((((b9 << 8) | b8) << 8) | b7) << 8) | b6;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsCharacterDefined(char c)
{
int index = (int)((uint)c >> 5);
int num = c & 31;
return ((GetDefinedCharacterBitmap()[index] >> num) & 1) != 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsSupplementaryCodePoint(int scalar)
{
return (scalar & -65536) != 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsUtf8ContinuationByte([In] [IsReadOnly] ref byte value)
{
return (sbyte)value < -64;
}
}
}