UnicodeHelpers
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace System.Text.Unicode
{
internal static class UnicodeHelpers
{
private const char UNICODE_REPLACEMENT_CHAR = '�';
internal const int UNICODE_LAST_CODEPOINT = 1114111;
private static uint[] _definedCharacterBitmapBigEndian = BitConverter.IsLittleEndian ? null : CreateDefinedCharacterBitmapMachineEndian();
private unsafe static ReadOnlySpan<byte> DefinedCharsBitmapSpan => new ReadOnlySpan<byte>(&global::<PrivateImplementationDetails>.B834DC34C55D9E54905161E0ED4AF27E7F1C49AF, 8192);
private static uint[] CreateDefinedCharacterBitmapMachineEndian()
{
ReadOnlySpan<byte> source = DefinedCharsBitmapSpan;
uint[] array = new uint[source.Length / 4];
for (int i = 0; i < array.Length; i++) {
array[i] = BinaryPrimitives.ReadUInt32LittleEndian(source);
source = source.Slice(4);
}
return array;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static ReadOnlySpan<uint> GetDefinedCharacterBitmap()
{
if (BitConverter.IsLittleEndian)
return MemoryMarshal.Cast<byte, uint>(DefinedCharsBitmapSpan);
return _definedCharacterBitmapBigEndian;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int GetScalarValueFromUtf16(char first, char? second, out bool wasSurrogatePair)
{
if (!char.IsSurrogate(first)) {
wasSurrogatePair = false;
return first;
}
return GetScalarValueFromUtf16Slow(first, second, out wasSurrogatePair);
}
private static int GetScalarValueFromUtf16Slow(char first, char? second, out bool wasSurrogatePair)
{
if (char.IsHighSurrogate(first)) {
if (second.HasValue) {
if (char.IsLowSurrogate(second.Value)) {
wasSurrogatePair = true;
return GetScalarValueFromUtf16SurrogatePair(first, second.Value);
}
wasSurrogatePair = false;
return 65533;
}
wasSurrogatePair = false;
return 65533;
}
wasSurrogatePair = false;
return 65533;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal unsafe static int GetScalarValueFromUtf16(char* pChar, bool endOfString)
{
char c = *pChar;
if (!char.IsSurrogate(c))
return c;
return GetScalarValueFromUtf16Slow(pChar, endOfString);
}
private unsafe static int GetScalarValueFromUtf16Slow(char* pChar, bool endOfString)
{
char c = *pChar;
if (!char.IsSurrogate(c))
return c;
if (char.IsHighSurrogate(c)) {
if (endOfString)
return 65533;
char c2 = pChar[1];
if (char.IsLowSurrogate(c2))
return GetScalarValueFromUtf16SurrogatePair(c, c2);
return 65533;
}
return 65533;
}
private static int GetScalarValueFromUtf16SurrogatePair(char highSurrogate, char lowSurrogate)
{
return (lowSurrogate & 1023) | ((highSurrogate & 1023) + 64 << 10);
}
internal static void GetUtf16SurrogatePairFromAstralScalarValue(int scalar, out char highSurrogate, out char lowSurrogate)
{
int num = scalar & 65535;
int num2 = scalar >> 16;
int num3 = num2 - 1;
highSurrogate = (char)(55296 | (num3 << 6) | (num >> 10));
lowSurrogate = (char)(56320 | (num & 1023));
}
internal static int GetUtf8RepresentationForScalarValue(uint scalar)
{
if (scalar <= 127)
return (byte)scalar;
if (scalar <= 2047) {
byte b = (byte)(192 | (scalar >> 6));
byte b2 = (byte)(128 | (scalar & 63));
return (b2 << 8) | b;
}
if (scalar <= 65535) {
byte b3 = (byte)(224 | (scalar >> 12));
byte b4 = (byte)(128 | ((scalar >> 6) & 63));
byte b5 = (byte)(128 | (scalar & 63));
return (((b5 << 8) | b4) << 8) | b3;
}
byte b6 = (byte)(240 | (scalar >> 18));
byte b7 = (byte)(128 | ((scalar >> 12) & 63));
byte b8 = (byte)(128 | ((scalar >> 6) & 63));
byte b9 = (byte)(128 | (scalar & 63));
return (((((b9 << 8) | b8) << 8) | b7) << 8) | b6;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsCharacterDefined(char c)
{
int index = (int)((uint)c >> 5);
int num = c & 31;
return ((GetDefinedCharacterBitmap()[index] >> num) & 1) != 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsSupplementaryCodePoint(int scalar)
{
return (scalar & -65536) != 0;
}
}
}