TextEncoder
The base class of web encoders.
using System.Buffers;
using System.ComponentModel;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text.Unicode;
namespace System.Text.Encodings.Web
{
public abstract class TextEncoder
{
private struct AsciiNeedsEscapingData
{
public unsafe fixed bool Data[128];
}
private readonly byte[][] _asciiEscape = new byte[128][];
private volatile bool _isAsciiCacheInitialized;
private AsciiNeedsEscapingData _asciiNeedsEscaping;
private static readonly byte[] s_noEscape = Array.Empty<byte>();
[EditorBrowsable(EditorBrowsableState.Never)]
public abstract int MaxOutputCharactersPerInputCharacter { get; }
[CLSCompliant(false)]
[EditorBrowsable(EditorBrowsableState.Never)]
public unsafe abstract bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten);
[CLSCompliant(false)]
[EditorBrowsable(EditorBrowsableState.Never)]
public unsafe abstract int FindFirstCharacterToEncode(char* text, int textLength);
[EditorBrowsable(EditorBrowsableState.Never)]
public abstract bool WillEncode(int unicodeScalar);
[System.Runtime.CompilerServices.NullableContext(1)]
public unsafe virtual string Encode(string value)
{
if (value != null) {
fixed (char* ptr = value) {
int num = FindFirstCharacterToEncode(ptr, value.Length);
if (num == -1)
return value;
int num2 = MaxOutputCharactersPerInputCharacter * value.Length;
int charsConsumed;
if (num2 >= 1024) {
char[] array = new char[num2];
fixed (char* buffer = &array[0]) {
if (EncodeIntoBuffer(buffer, num2, ptr, value.Length, out charsConsumed, out int charsWritten, num, true) != 0)
ThrowArgumentException_MaxOutputCharsPerInputChar();
return new string(array, 0, charsWritten);
}
}
char* ptr2 = stackalloc char[num2];
if (EncodeIntoBuffer(ptr2, num2, ptr, value.Length, out charsConsumed, out int charsWritten2, num, true) != 0)
ThrowArgumentException_MaxOutputCharsPerInputChar();
return new string(ptr2, 0, charsWritten2);
}
}
throw new ArgumentNullException("value");
}
private unsafe OperationStatus EncodeIntoBuffer(char* buffer, int bufferLength, char* value, int valueLength, out int charsConsumed, out int charsWritten, int firstCharacterToEncode, bool isFinalBlock = true)
{
char* ptr = buffer;
charsWritten = 0;
if (firstCharacterToEncode > 0) {
Buffer.MemoryCopy(value, buffer, 2 * bufferLength, 2 * firstCharacterToEncode);
charsWritten += firstCharacterToEncode;
bufferLength -= firstCharacterToEncode;
buffer += firstCharacterToEncode;
}
char c = value[firstCharacterToEncode];
char c2 = c;
bool wasSurrogatePair = false;
int i;
for (i = firstCharacterToEncode + 1; i < valueLength; i++) {
c = (wasSurrogatePair ? value[i - 1] : c2);
c2 = value[i];
if (!WillEncode(c)) {
wasSurrogatePair = false;
*buffer = c;
buffer++;
bufferLength--;
charsWritten++;
} else {
bool needsMoreData;
int scalarValueFromUtf = UnicodeHelpers.GetScalarValueFromUtf16(c, c2, out wasSurrogatePair, out needsMoreData);
if (!TryEncodeUnicodeScalar(scalarValueFromUtf, buffer, bufferLength, out int numberOfCharactersWritten)) {
charsConsumed = (int)(ptr - buffer);
return OperationStatus.DestinationTooSmall;
}
if (wasSurrogatePair)
i++;
buffer += numberOfCharactersWritten;
bufferLength -= numberOfCharactersWritten;
charsWritten += numberOfCharactersWritten;
}
}
if (i == valueLength) {
c = value[valueLength - 1];
bool needsMoreData2;
int scalarValueFromUtf2 = UnicodeHelpers.GetScalarValueFromUtf16(c, null, out wasSurrogatePair, out needsMoreData2);
if (!isFinalBlock & needsMoreData2) {
charsConsumed = (int)(buffer - ptr);
return OperationStatus.NeedMoreData;
}
if (!TryEncodeUnicodeScalar(scalarValueFromUtf2, buffer, bufferLength, out int numberOfCharactersWritten2)) {
charsConsumed = (int)(buffer - ptr);
return OperationStatus.DestinationTooSmall;
}
buffer += numberOfCharactersWritten2;
bufferLength -= numberOfCharactersWritten2;
charsWritten += numberOfCharactersWritten2;
}
charsConsumed = valueLength;
return OperationStatus.Done;
}
[System.Runtime.CompilerServices.NullableContext(1)]
public void Encode(TextWriter output, string value)
{
Encode(output, value, 0, value.Length);
}
[System.Runtime.CompilerServices.NullableContext(1)]
public unsafe virtual void Encode(TextWriter output, string value, int startIndex, int characterCount)
{
if (value == null)
throw new ArgumentNullException("value");
if (output == null)
throw new ArgumentNullException("output");
ValidateRanges(startIndex, characterCount, value.Length);
fixed (char* ptr = value) {
char* ptr2 = ptr + startIndex;
int num = FindFirstCharacterToEncode(ptr2, characterCount);
if (num == -1) {
if (startIndex == 0 && characterCount == value.Length)
output.Write(value);
else {
for (int i = 0; i < characterCount; i++) {
output.Write(*ptr2);
ptr2++;
}
}
} else {
for (int j = 0; j < num; j++) {
output.Write(*ptr2);
ptr2++;
}
EncodeCore(output, ptr2, characterCount - num);
}
}
}
[System.Runtime.CompilerServices.NullableContext(1)]
public unsafe virtual void Encode(TextWriter output, char[] value, int startIndex, int characterCount)
{
if (value == null)
throw new ArgumentNullException("value");
if (output == null)
throw new ArgumentNullException("output");
ValidateRanges(startIndex, characterCount, value.Length);
fixed (char* ptr = value) {
char* ptr2 = ptr + startIndex;
int num = FindFirstCharacterToEncode(ptr2, characterCount);
if (num == -1) {
if (startIndex == 0 && characterCount == value.Length)
output.Write(value);
else {
for (int i = 0; i < characterCount; i++) {
output.Write(*ptr2);
ptr2++;
}
}
} else {
for (int j = 0; j < num; j++) {
output.Write(*ptr2);
ptr2++;
}
EncodeCore(output, ptr2, characterCount - num);
}
}
}
public unsafe virtual OperationStatus EncodeUtf8(ReadOnlySpan<byte> utf8Source, Span<byte> utf8Destination, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true)
{
int length = utf8Source.Length;
int length2 = utf8Destination.Length;
char* ptr = stackalloc char[24];
byte* ptr2 = stackalloc byte[72];
int bytesConsumed2 = 0;
int num = 0;
OperationStatus operationStatus = OperationStatus.Done;
while (!utf8Source.IsEmpty) {
uint result;
ReadOnlySpan<byte> readOnlySpan;
do {
result = utf8Source[num];
if (System.Text.UnicodeUtility.IsAsciiCodePoint(result)) {
byte[] asciiEncoding = GetAsciiEncoding((byte)result);
if (asciiEncoding == s_noEscape) {
if (++num > utf8Destination.Length) {
num--;
operationStatus = OperationStatus.DestinationTooSmall;
break;
}
} else {
if (asciiEncoding == null) {
operationStatus = OperationStatus.Done;
bytesConsumed2 = 1;
break;
}
if (num > 0) {
readOnlySpan = utf8Source.Slice(0, num);
readOnlySpan.CopyTo(utf8Destination);
utf8Source = utf8Source.Slice(num);
utf8Destination = utf8Destination.Slice(num);
num = 0;
}
readOnlySpan = asciiEncoding;
if (!readOnlySpan.TryCopyTo(utf8Destination)) {
operationStatus = OperationStatus.DestinationTooSmall;
break;
}
utf8Destination = utf8Destination.Slice(asciiEncoding.Length);
utf8Source = utf8Source.Slice(1);
}
} else {
operationStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Source.Slice(num), out result, out bytesConsumed2);
if (operationStatus != 0)
break;
if (WillEncode((int)result))
break;
num += bytesConsumed2;
if (num > utf8Destination.Length) {
num -= bytesConsumed2;
operationStatus = OperationStatus.DestinationTooSmall;
break;
}
}
} while (num < utf8Source.Length);
if (num > 0) {
readOnlySpan = utf8Source.Slice(0, num);
readOnlySpan.CopyTo(utf8Destination);
utf8Source = utf8Source.Slice(num);
utf8Destination = utf8Destination.Slice(num);
num = 0;
}
if (utf8Source.IsEmpty)
break;
int bytes;
switch (operationStatus) {
case OperationStatus.NeedMoreData:
if (!isFinalBlock) {
bytesConsumed = length - utf8Source.Length;
bytesWritten = length2 - utf8Destination.Length;
return OperationStatus.NeedMoreData;
}
goto default;
default: {
if (!TryEncodeUnicodeScalar((int)result, ptr, 24, out int numberOfCharactersWritten)) {
bytesConsumed = length - utf8Source.Length;
bytesWritten = length2 - utf8Destination.Length;
return OperationStatus.InvalidData;
}
bytes = Encoding.UTF8.GetBytes(ptr, numberOfCharactersWritten, ptr2, 72);
ReadOnlySpan<byte> readOnlySpan2 = new ReadOnlySpan<byte>(ptr2, bytes);
if (System.Text.UnicodeUtility.IsAsciiCodePoint(result))
_asciiEscape[result] = readOnlySpan2.ToArray();
if (readOnlySpan2.TryCopyTo(utf8Destination))
break;
goto case OperationStatus.DestinationTooSmall;
}
case OperationStatus.DestinationTooSmall:
bytesConsumed = length - utf8Source.Length;
bytesWritten = length2 - utf8Destination.Length;
return OperationStatus.DestinationTooSmall;
}
utf8Destination = utf8Destination.Slice(bytes);
utf8Source = utf8Source.Slice(bytesConsumed2);
}
bytesConsumed = length;
bytesWritten = length2 - utf8Destination.Length;
return OperationStatus.Done;
}
public unsafe virtual OperationStatus Encode(ReadOnlySpan<char> source, Span<char> destination, out int charsConsumed, out int charsWritten, bool isFinalBlock = true)
{
fixed (char* ptr = &source.GetPinnableReference()) {
int firstCharacterToEncode;
if (source.IsEmpty || (firstCharacterToEncode = FindFirstCharacterToEncode(ptr, source.Length)) == -1) {
if (source.TryCopyTo(destination)) {
charsConsumed = source.Length;
charsWritten = source.Length;
return OperationStatus.Done;
}
charsConsumed = 0;
charsWritten = 0;
return OperationStatus.DestinationTooSmall;
}
if (!destination.IsEmpty) {
fixed (char* buffer = &destination.GetPinnableReference()) {
return EncodeIntoBuffer(buffer, destination.Length, ptr, source.Length, out charsConsumed, out charsWritten, firstCharacterToEncode, isFinalBlock);
}
}
charsConsumed = 0;
charsWritten = 0;
return OperationStatus.DestinationTooSmall;
}
}
private unsafe void EncodeCore(TextWriter output, char* value, int valueLength)
{
int maxOutputCharactersPerInputCharacter = MaxOutputCharactersPerInputCharacter;
char* ptr = stackalloc char[maxOutputCharactersPerInputCharacter];
char c = *value;
char c2 = c;
bool wasSurrogatePair = false;
int i;
bool needsMoreData;
int numberOfCharactersWritten;
for (i = 1; i < valueLength; i++) {
c = (wasSurrogatePair ? value[i - 1] : c2);
c2 = value[i];
if (!WillEncode(c)) {
wasSurrogatePair = false;
output.Write(c);
} else {
int scalarValueFromUtf = UnicodeHelpers.GetScalarValueFromUtf16(c, c2, out wasSurrogatePair, out needsMoreData);
if (!TryEncodeUnicodeScalar(scalarValueFromUtf, ptr, maxOutputCharactersPerInputCharacter, out numberOfCharactersWritten))
ThrowArgumentException_MaxOutputCharsPerInputChar();
Write(output, ptr, numberOfCharactersWritten);
if (wasSurrogatePair)
i++;
}
}
if (!wasSurrogatePair || i == valueLength) {
c = value[valueLength - 1];
int scalarValueFromUtf2 = UnicodeHelpers.GetScalarValueFromUtf16(c, null, out wasSurrogatePair, out needsMoreData);
if (!TryEncodeUnicodeScalar(scalarValueFromUtf2, ptr, maxOutputCharactersPerInputCharacter, out numberOfCharactersWritten))
ThrowArgumentException_MaxOutputCharsPerInputChar();
Write(output, ptr, numberOfCharactersWritten);
}
}
private unsafe int FindFirstCharacterToEncode(ReadOnlySpan<char> text)
{
fixed (char* text2 = &MemoryMarshal.GetReference(text)) {
return FindFirstCharacterToEncode(text2, text.Length);
}
}
[EditorBrowsable(EditorBrowsableState.Never)]
public unsafe virtual int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf8Text)
{
if (!_isAsciiCacheInitialized)
InitializeAsciiCache();
fixed (byte* ptr = &utf8Text.GetPinnableReference()) {
int num = 0;
while (true) {
if (num >= utf8Text.Length) {
num = -1;
break;
}
if (System.Text.UnicodeUtility.IsAsciiCodePoint(ptr[num])) {
if (DoesAsciiNeedEncoding(ptr[num]))
break;
num++;
} else {
if (UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(num), out uint result, out int bytesConsumed) != 0 || WillEncode((int)result))
break;
num += bytesConsumed;
}
}
return num;
}
}
internal unsafe static bool TryCopyCharacters(char[] source, char* destination, int destinationLength, out int numberOfCharactersWritten)
{
if (destinationLength < source.Length) {
numberOfCharactersWritten = 0;
return false;
}
for (int i = 0; i < source.Length; i++) {
destination[i] = source[i];
}
numberOfCharactersWritten = source.Length;
return true;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal unsafe static bool TryWriteScalarAsChar(int unicodeScalar, char* destination, int destinationLength, out int numberOfCharactersWritten)
{
if (destinationLength < 1) {
numberOfCharactersWritten = 0;
return false;
}
*destination = (char)unicodeScalar;
numberOfCharactersWritten = 1;
return true;
}
private static void ValidateRanges(int startIndex, int characterCount, int actualInputLength)
{
if (startIndex < 0 || startIndex > actualInputLength)
throw new ArgumentOutOfRangeException("startIndex");
if (characterCount < 0 || characterCount > actualInputLength - startIndex)
throw new ArgumentOutOfRangeException("characterCount");
}
private unsafe static void Write(TextWriter output, char* input, int inputLength)
{
while (inputLength-- > 0) {
output.Write(*input);
input++;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private byte[] GetAsciiEncoding(byte value)
{
byte[] array = _asciiEscape[value];
if (array == null && !WillEncode(value)) {
array = s_noEscape;
_asciiEscape[value] = array;
}
return array;
}
[MethodImpl(MethodImplOptions.NoInlining)]
private unsafe void InitializeAsciiCache()
{
for (int i = 0; i < 128; i++) {
*(sbyte*)(ref *_asciiNeedsEscaping.Data + i) = (WillEncode(i) ? ((sbyte)1) : ((sbyte)0));
}
_isAsciiCacheInitialized = true;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private unsafe bool DoesAsciiNeedEncoding(uint value)
{
return *(byte*)(ref *_asciiNeedsEscaping.Data + value) != 0;
}
private static void ThrowArgumentException_MaxOutputCharsPerInputChar()
{
throw new ArgumentException(System.SR.TextEncoderDoesNotImplementMaxOutputCharsPerInputChar);
}
}
}