From f27d33729518f5aa478aa818b7b4f54a4d50bef1 Mon Sep 17 00:00:00 2001
From: Levi Broderick <levib@microsoft.com>
Date: Tue, 16 Feb 2021 21:12:02 +0000
Subject: [PATCH] Tighten bounds checks around TextEncoder logic

- Replaces unsafe code with safe code where possible
- Fixes some surrogate pairs being misinterpreted
- Fixes https://github.com/dotnet/runtime/issues/45994
- Ref: MSRC 62749 (CVE-2021-26701)
---
 NuGet.config                                  |   2 +
 eng/restore/harvestPackages.targets           |   1 +
 .../Directory.Build.props                     |   3 +
 .../ref/System.Text.Encodings.Web.csproj      |   1 +
 .../src/System.Text.Encodings.Web.csproj      |   6 +
 .../src/System/IO/TextWriterExtensions.cs     |  43 ++
 .../System/Text/Encodings/Web/TextEncoder.cs  | 481 ++++++++----------
 .../src/System/Text/Unicode/UnicodeHelpers.cs | 167 ------
 .../tests/AllowedCharsBitmapTests.cs          |   2 +-
 .../tests/ConfigurableScalarTextEncoder.cs    |  54 +-
 .../tests/HtmlEncoderTests.cs                 |   2 +-
 .../JavaScriptStringEncoderTests.Relaxed.cs   |   2 +-
 .../tests/JavaScriptStringEncoderTests.cs     |   2 +-
 .../tests/ScalarTestEncoder.cs                |  20 +-
 .../System.Text.Encodings.Web.Tests.csproj    |   3 +
 .../tests/TextEncoderBatteryTests.cs          | 241 +++++++++
 .../tests/TextEncoderTests.cs                 |  84 ++-
 .../tests/UnicodeEncoderBase.cs               |  17 +-
 .../tests/UnicodeEncoderBaseTests.cs          |   2 +-
 .../tests/UnicodeHelpersTests.cs              |  64 +--
 .../tests/UnicodeTestHelpers.cs               |  22 +
 .../tests/UrlEncoderTests.cs                  |   2 +-
 src/libraries/libraries-packages.proj         |   1 +
 src/libraries/pkg/baseline/packageIndex.json  |  17 +-
 24 files changed, 710 insertions(+), 529 deletions(-)
 create mode 100644 src/libraries/System.Text.Encodings.Web/src/System/IO/TextWriterExtensions.cs
 create mode 100644 src/libraries/System.Text.Encodings.Web/tests/TextEncoderBatteryTests.cs
 create mode 100644 src/libraries/System.Text.Encodings.Web/tests/UnicodeTestHelpers.cs
diff --git a/NuGet.config b/NuGet.config
index 272a9bec4fd23..34300a9188a76 100644
--- a/NuGet.config
+++ b/NuGet.config
@@ -16,6 +16,8 @@
     <add key="dotnet-eng" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json" />
     <add key="dotnet5" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" />
     <add key="dotnet5-transport" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5-transport/nuget/v3/index.json" />
+    <!-- Harvesting feed from 2.1 -->
+    <add key="darc-int-corefx-2.1.26" value="https://pkgs.dev.azure.com/dnceng/internal/_packaging/darc-int-corefx-2.1.26/nuget/v3/index.json" />
   </packageSources>
   <disabledPackageSources>
     <clear />
diff --git a/eng/restore/harvestPackages.targets b/eng/restore/harvestPackages.targets
index 2e9a8155be678..0d238bddc87ba 100644
--- a/eng/restore/harvestPackages.targets
+++ b/eng/restore/harvestPackages.targets
@@ -23,6 +23,7 @@
 
     <!-- Allow to override package download and versions in case there is already a PackageDownload set -->
     <ItemGroup>
+      <PackageDownload Include="System.Text.Encodings.Web" Version="4.5.1" />
       <_OverridenPackageDownloads Include="@(_PackageDownload)" Condition="'@(PackageDownload)' == '@(_PackageDownload)' and %(Identity) != ''" />
       <_PackageDownload Remove="@(_OverridenPackageDownloads)" />
       <_PackageDownload Include="@(PackageDownload)" />
diff --git a/src/libraries/System.Text.Encodings.Web/Directory.Build.props b/src/libraries/System.Text.Encodings.Web/Directory.Build.props
index bdcfca3b543cb..10888235eab33 100644
--- a/src/libraries/System.Text.Encodings.Web/Directory.Build.props
+++ b/src/libraries/System.Text.Encodings.Web/Directory.Build.props
@@ -1,6 +1,9 @@
 ﻿<Project>
   <Import Project="..\Directory.Build.props" />
   <PropertyGroup>
+    <AssemblyVersion>5.0.0.1</AssemblyVersion>
+    <PackageVersion>5.0.1</PackageVersion>
+    <HarvestVersion>4.5.1</HarvestVersion>
     <StrongNameKeyId>Open</StrongNameKeyId>
   </PropertyGroup>
 </Project>
\ No newline at end of file
diff --git a/src/libraries/System.Text.Encodings.Web/ref/System.Text.Encodings.Web.csproj b/src/libraries/System.Text.Encodings.Web/ref/System.Text.Encodings.Web.csproj
index eff1dfbb548ca..24b4be39e3164 100644
--- a/src/libraries/System.Text.Encodings.Web/ref/System.Text.Encodings.Web.csproj
+++ b/src/libraries/System.Text.Encodings.Web/ref/System.Text.Encodings.Web.csproj
@@ -17,6 +17,7 @@
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0' or
                          $(TargetFramework.StartsWith('net4'))">
+    <PackageReference Include="System.Buffers" Version="$(SystemBuffersVersion)" />
     <PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj b/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj
index 906f6d9dc252a..dd490865d205c 100644
--- a/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj
+++ b/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj
@@ -10,6 +10,7 @@
     <NoWarn>$(NoWarn);CS3019</NoWarn>
   </PropertyGroup>
   <ItemGroup>
+    <Compile Include="System\IO\TextWriterExtensions.cs" />
     <Compile Include="System\Text\Encodings\Web\DefaultJavaScriptEncoder.cs" />
     <Compile Include="System\Text\Encodings\Web\DefaultJavaScriptEncoderBasicLatin.cs" />
     <Compile Include="System\Text\Encodings\Web\HtmlEncoder.cs" />
@@ -40,6 +41,7 @@
     <Compile Include="$(CoreLibSharedDir)System\Text\UnicodeDebug.cs" Link="System\Text\UnicodeDebug.cs" />
     <Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs" Link="System\Text\UnicodeUtility.cs" />
     <Compile Include="$(CommonPath)System\HexConverter.cs" Link="Common\System\HexConverter.cs" />
+    <Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs" Link="Common\System\Text\ValueStringBuilder.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' == '$(NetCoreAppCurrent)' or
                         '$(TargetFramework)' == 'netcoreapp3.0'">
@@ -51,8 +53,12 @@
     <Reference Include="System.Runtime.Intrinsics" />
     <Reference Include="System.Threading" />
   </ItemGroup>
+  <ItemGroup Condition="'$(TargetFramework)' == 'netcoreapp3.0'">
+    <Reference Include="System.Buffers" />
+  </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0' or
                         $(TargetFramework.StartsWith('net4'))">
+    <PackageReference Include="System.Buffers" Version="$(SystemBuffersVersion)" />
     <PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
   </ItemGroup>
 </Project>
diff --git a/src/libraries/System.Text.Encodings.Web/src/System/IO/TextWriterExtensions.cs b/src/libraries/System.Text.Encodings.Web/src/System/IO/TextWriterExtensions.cs
new file mode 100644
index 0000000000000..c2ace13699a48
--- /dev/null
+++ b/src/libraries/System.Text.Encodings.Web/src/System/IO/TextWriterExtensions.cs
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+
+#if !(NETCOREAPP || NETSTANDARD2_1)
+using System.Buffers;
+#endif
+
+namespace System.IO
+{
+    internal static class TextWriterExtensions
+    {
+        /// <summary>
+        /// Writes a partial string (given offset and count) to the underlying TextWriter.
+        /// </summary>
+        public static void WritePartialString(this TextWriter writer, string value, int offset, int count)
+        {
+            Debug.Assert(writer != null);
+            Debug.Assert(value != null);
+
+            if (offset == 0 && count == value.Length)
+            {
+                // on all platforms, prefer TextWriter.Write(string) if no slicing is required
+                writer.Write(value);
+            }
+            else
+            {
+                // if slicing is required, call TextWriter.Write(ROS<char>) if available;
+                // otherwise rent an array and implement the Write routine ourselves
+                ReadOnlySpan<char> sliced = value.AsSpan(offset, count);
+#if NETCOREAPP || NETSTANDARD2_1
+                writer.Write(sliced);
+#else
+                char[] rented = ArrayPool<char>.Shared.Rent(sliced.Length);
+                sliced.CopyTo(rented);
+                writer.Write(rented, 0, sliced.Length);
+                ArrayPool<char>.Shared.Return(rented);
+#endif
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs b/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
index d8c228e79202f..91902c84117a9 100644
--- a/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
+++ b/src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
@@ -26,6 +26,8 @@ namespace System.Text.Encodings.Web
     /// </remarks>
     public abstract class TextEncoder
     {
+        private const int EncodeStartingOutputBufferSize = 1024; // bytes or chars, depending
+
         // Fast cache for Ascii
         private readonly byte[][] _asciiEscape = new byte[0x80][];
 
@@ -107,154 +109,47 @@ public virtual string Encode(string value)
                 throw new ArgumentNullException(nameof(value));
             }
 
-            unsafe
-            {
-                fixed (char* valuePointer = value)
-                {
-                    int firstCharacterToEncode = FindFirstCharacterToEncode(valuePointer, value.Length);
-
-                    if (firstCharacterToEncode == -1)
-                    {
-                        return value;
-                    }
-
-                    int bufferSize = MaxOutputCharactersPerInputCharacter * value.Length;
-
-                    string result;
-                    if (bufferSize < 1024)
-                    {
-                        char* wholebuffer = stackalloc char[bufferSize];
-                        OperationStatus status = EncodeIntoBuffer(wholebuffer, bufferSize, valuePointer, value.Length, out int _, out int totalWritten, firstCharacterToEncode);
-                        if (status != OperationStatus.Done)
-                        {
-                            ThrowArgumentException_MaxOutputCharsPerInputChar();
-                        }
-
-                        result = new string(wholebuffer, 0, totalWritten);
-                    }
-                    else
-                    {
-                        char[] wholebuffer = new char[bufferSize];
-                        fixed (char* buffer = &wholebuffer[0])
-                        {
-                            OperationStatus status = EncodeIntoBuffer(buffer, bufferSize, valuePointer, value.Length, out int _, out int totalWritten, firstCharacterToEncode);
-                            if (status != OperationStatus.Done)
-                            {
-                                ThrowArgumentException_MaxOutputCharsPerInputChar();
-                            }
-
-                            result = new string(wholebuffer, 0, totalWritten);
-                        }
-                    }
-
-                    return result;
-                }
-            }
-        }
-
-        private unsafe OperationStatus EncodeIntoBuffer(
-            char* buffer,
-            int bufferLength,
-            char* value,
-            int valueLength,
-            out int charsConsumed,
-            out int charsWritten,
-            int firstCharacterToEncode,
-            bool isFinalBlock = true)
-        {
-            Debug.Assert(value != null);
-            Debug.Assert(firstCharacterToEncode >= 0);
-
-            char* originalBuffer = buffer;
-            charsWritten = 0;
-
-            if (firstCharacterToEncode > 0)
+            int indexOfFirstCharToEncode = FindFirstCharacterToEncode(value.AsSpan());
+            if (indexOfFirstCharToEncode < 0)
             {
-                Debug.Assert(firstCharacterToEncode <= valueLength);
-                Buffer.MemoryCopy(source: value,
-                    destination: buffer,
-                    destinationSizeInBytes: sizeof(char) * bufferLength,
-                    sourceBytesToCopy: sizeof(char) * firstCharacterToEncode);
-
-                charsWritten += firstCharacterToEncode;
-                bufferLength -= firstCharacterToEncode;
-                buffer += firstCharacterToEncode;
+                return value; // shortcut: there's no work to perform
             }
 
-            int valueIndex = firstCharacterToEncode;
-
-            char firstChar = value[valueIndex];
-            char secondChar = firstChar;
-            bool wasSurrogatePair = false;
-
-            // this loop processes character pairs (in case they are surrogates).
-            // there is an if block below to process single last character.
-            int secondCharIndex;
-            for (secondCharIndex = valueIndex + 1; secondCharIndex < valueLength; secondCharIndex++)
-            {
-                if (!wasSurrogatePair)
-                {
-                    firstChar = secondChar;
-                }
-                else
-                {
-                    firstChar = value[secondCharIndex - 1];
-                }
-
-                secondChar = value[secondCharIndex];
-
-                if (!WillEncode(firstChar))
-                {
-                    wasSurrogatePair = false;
-                    *buffer = firstChar;
-                    buffer++;
-                    bufferLength--;
-                    charsWritten++;
-                }
-                else
-                {
-                    int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(firstChar, secondChar, out wasSurrogatePair, out bool _);
-                    if (!TryEncodeUnicodeScalar(nextScalar, buffer, bufferLength, out int charsWrittenThisTime))
-                    {
-                        charsConsumed = (int)(originalBuffer - buffer);
-                        return OperationStatus.DestinationTooSmall;
-                    }
+            ReadOnlySpan<char> remainingInput = value.AsSpan(indexOfFirstCharToEncode);
+            ValueStringBuilder stringBuilder = new ValueStringBuilder(stackalloc char[EncodeStartingOutputBufferSize]);
 
-                    if (wasSurrogatePair)
-                    {
-                        secondCharIndex++;
-                    }
+#if !NETCOREAPP
+            // Can't call string.Concat later in the method, so memcpy now.
+            stringBuilder.Append(value.AsSpan(0, indexOfFirstCharToEncode));
+#endif
 
-                    buffer += charsWrittenThisTime;
-                    bufferLength -= charsWrittenThisTime;
-                    charsWritten += charsWrittenThisTime;
-                }
-            }
+            // On each iteration of the main loop, we'll make sure we have at least this many chars left in the
+            // destination buffer. This should prevent us from making very chatty calls where we only make progress
+            // one char at a time.
+            int minBufferBumpEachIteration = Math.Max(MaxOutputCharactersPerInputCharacter, EncodeStartingOutputBufferSize);
 
-            if (secondCharIndex == valueLength)
+            do
             {
-                firstChar = value[valueLength - 1];
-                int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(firstChar, null, out wasSurrogatePair, out bool needMoreData);
-                if (!isFinalBlock && needMoreData)
-                {
-                    Debug.Assert(wasSurrogatePair == false);
-                    charsConsumed = (int)(buffer - originalBuffer);
-                    return OperationStatus.NeedMoreData;
-                }
-
-                if (!TryEncodeUnicodeScalar(nextScalar, buffer, bufferLength, out int charsWrittenThisTime))
+                // AppendSpan mutates the VSB length to include the newly-added span. This potentially overallocates.
+                Span<char> destBuffer = stringBuilder.AppendSpan(Math.Max(remainingInput.Length, minBufferBumpEachIteration));
+                Encode(remainingInput, destBuffer, out int charsConsumedJustNow, out int charsWrittenJustNow);
+                if (charsWrittenJustNow == 0 || (uint)charsWrittenJustNow > (uint)destBuffer.Length)
                 {
-                    charsConsumed = (int)(buffer - originalBuffer);
-                    return OperationStatus.DestinationTooSmall;
+                    ThrowArgumentException_MaxOutputCharsPerInputChar(); // couldn't make forward progress or returned bogus data
                 }
+                remainingInput = remainingInput.Slice(charsConsumedJustNow);
+                // It's likely we didn't populate the entire span. If this is the case, adjust the VSB length
+                // to reflect that there's unused buffer at the end of the VSB instance.
+                stringBuilder.Length -= destBuffer.Length - charsWrittenJustNow;
+            } while (!remainingInput.IsEmpty);
 
-                buffer += charsWrittenThisTime;
-                bufferLength -= charsWrittenThisTime;
-                charsWritten += charsWrittenThisTime;
-            }
-
-            charsConsumed = valueLength;
-            return OperationStatus.Done;
+#if NETCOREAPP
+            string retVal = string.Concat(value.AsSpan(0, indexOfFirstCharToEncode), stringBuilder.AsSpan());
+            stringBuilder.Dispose();
+            return retVal;
+#else
+            return stringBuilder.ToString();
+#endif
         }
 
         /// <summary>
@@ -286,37 +181,18 @@ public virtual void Encode(TextWriter output, string value, int startIndex, int
             }
             ValidateRanges(startIndex, characterCount, actualInputLength: value.Length);
 
-            unsafe
+            int indexOfFirstCharToEncode = FindFirstCharacterToEncode(value.AsSpan(startIndex, characterCount));
+            if (indexOfFirstCharToEncode < 0)
             {
-                fixed (char* valuePointer = value)
-                {
-                    char* substring = valuePointer + startIndex;
-                    int firstIndexToEncode = FindFirstCharacterToEncode(substring, characterCount);
-
-                    if (firstIndexToEncode == -1) // nothing to encode;
-                    {
-                        if (startIndex == 0 && characterCount == value.Length) // write whole string
-                        {
-                            output.Write(value);
-                            return;
-                        }
-                        for (int i = 0; i < characterCount; i++) // write substring
-                        {
-                            output.Write(*substring);
-                            substring++;
-                        }
-                        return;
-                    }
+                indexOfFirstCharToEncode = characterCount;
+            }
 
-                    // write prefix, then encode
-                    for (int i = 0; i < firstIndexToEncode; i++)
-                    {
-                        output.Write(*substring);
-                        substring++;
-                    }
+            // memcpy all characters that don't require encoding, then encode any remaining chars
 
-                    EncodeCore(output, substring, characterCount - firstIndexToEncode);
-                }
+            output.WritePartialString(value, startIndex, indexOfFirstCharToEncode);
+            if (indexOfFirstCharToEncode != characterCount)
+            {
+                Encode(output, value.AsSpan(startIndex + indexOfFirstCharToEncode, characterCount - indexOfFirstCharToEncode));
             }
         }
 
@@ -339,37 +215,16 @@ public virtual void Encode(TextWriter output, char[] value, int startIndex, int
             }
             ValidateRanges(startIndex, characterCount, actualInputLength: value.Length);
 
-            unsafe
+            int indexOfFirstCharToEncode = FindFirstCharacterToEncode(value.AsSpan(startIndex, characterCount));
+            if (indexOfFirstCharToEncode < 0)
             {
-                fixed (char* valuePointer = value)
-                {
-                    char* substring = valuePointer + startIndex;
-                    int firstIndexToEncode = FindFirstCharacterToEncode(substring, characterCount);
-
-                    if (firstIndexToEncode == -1) // nothing to encode;
-                    {
-                        if (startIndex == 0 && characterCount == value.Length) // write whole string
-                        {
-                            output.Write(value);
-                            return;
-                        }
-                        for (int i = 0; i < characterCount; i++) // write substring
-                        {
-                            output.Write(*substring);
-                            substring++;
-                        }
-                        return;
-                    }
-
-                    // write prefix, then encode
-                    for (int i = 0; i < firstIndexToEncode; i++)
-                    {
-                        output.Write(*substring);
-                        substring++;
-                    }
+                indexOfFirstCharToEncode = characterCount;
+            }
+            output.Write(value, startIndex, indexOfFirstCharToEncode);
 
-                    EncodeCore(output, substring, characterCount - firstIndexToEncode);
-                }
+            if (indexOfFirstCharToEncode != characterCount)
+            {
+                Encode(output, value.AsSpan(startIndex + indexOfFirstCharToEncode, characterCount - indexOfFirstCharToEncode));
             }
         }
 
@@ -584,99 +439,185 @@ public virtual OperationStatus Encode(
             out int charsWritten,
             bool isFinalBlock = true)
         {
-            unsafe
+            if (source.IsEmpty)
             {
-                fixed (char* sourcePtr = source)
-                {
-                    int firstCharacterToEncode;
-                    if (source.IsEmpty || (firstCharacterToEncode = FindFirstCharacterToEncode(sourcePtr, source.Length)) == -1)
-                    {
-                        if (source.TryCopyTo(destination))
-                        {
-                            charsConsumed = source.Length;
-                            charsWritten = source.Length;
-                            return OperationStatus.Done;
-                        }
+                // There's nothing to do.
+                charsConsumed = 0;
+                charsWritten = 0;
+                return OperationStatus.Done;
+            }
 
-                        charsConsumed = 0;
-                        charsWritten = 0;
-                        return OperationStatus.DestinationTooSmall;
-                    }
-                    else if (destination.IsEmpty)
-                    {
-                        // Guards against passing a null destinationPtr to EncodeIntoBuffer (pinning an empty Span will return a null pointer).
-                        charsConsumed = 0;
-                        charsWritten = 0;
-                        return OperationStatus.DestinationTooSmall;
-                    }
+            // The Encode method is intended to be called in a loop, potentially where the source buffer
+            // is much larger than the destination buffer. We don't want to walk the entire source buffer
+            // on each invocation of this method, so we'll slice the source buffer to be no larger than
+            // the destination buffer to avoid performing unnecessary work. The potential exists for us to
+            // split the source in the middle of a UTF-16 surrogate pair. If this happens,
+            // FindFirstCharacterToEncode will report the split surrogate as "needs encoding", we'll fall
+            // back down the slow path, and the slow path will handle the surrogate appropriately.
 
-                    fixed (char* destinationPtr = destination)
-                    {
-                        return EncodeIntoBuffer(destinationPtr, destination.Length, sourcePtr, source.Length, out charsConsumed, out charsWritten, firstCharacterToEncode, isFinalBlock);
-                    }
-                }
+            ReadOnlySpan<char> sourceSearchSpace = source;
+            if (destination.Length < source.Length)
+            {
+                sourceSearchSpace = source.Slice(0, destination.Length);
             }
-        }
 
-        private unsafe void EncodeCore(TextWriter output, char* value, int valueLength)
-        {
-            Debug.Assert(value != null && output != null);
-            Debug.Assert(valueLength >= 0);
+            int idxOfFirstCharToEncode = FindFirstCharacterToEncode(sourceSearchSpace);
+            if (idxOfFirstCharToEncode < 0)
+            {
+                idxOfFirstCharToEncode = sourceSearchSpace.Length;
+            }
+
+            source.Slice(0, idxOfFirstCharToEncode).CopyTo(destination); // memcpy data that doesn't need to be encoded
+            if (idxOfFirstCharToEncode == source.Length)
+            {
+                charsConsumed = source.Length;
+                charsWritten = source.Length;
+                return OperationStatus.Done; // memcopied all chars, nothing more to do
+            }
 
-            int bufferLength = MaxOutputCharactersPerInputCharacter;
-            char* buffer = stackalloc char[bufferLength];
+            // If we got to this point, we couldn't memcpy the entire source buffer into the destination.
+            // Either the destination was too short or we found data that needs to be encoded.
 
-            char firstChar = *value;
-            char secondChar = firstChar;
-            bool wasSurrogatePair = false;
-            int charsWritten;
+            OperationStatus opStatus = EncodeCore(source.Slice(idxOfFirstCharToEncode), destination.Slice(idxOfFirstCharToEncode), out int remainingCharsConsumed, out int remainingCharsWritten, isFinalBlock);
+            charsConsumed = idxOfFirstCharToEncode + remainingCharsConsumed;
+            charsWritten = idxOfFirstCharToEncode + remainingCharsWritten;
+            return opStatus;
 
-            // this loop processes character pairs (in case they are surrogates).
-            // there is an if block below to process single last character.
-            int secondCharIndex;
-            for (secondCharIndex = 1; secondCharIndex < valueLength; secondCharIndex++)
+            OperationStatus EncodeCore(ReadOnlySpan<char> source, Span<char> destination, out int charsConsumed, out int charsWritten, bool isFinalBlock)
             {
-                if (!wasSurrogatePair)
-                {
-                    firstChar = secondChar;
-                }
-                else
-                {
-                    firstChar = value[secondCharIndex - 1];
-                }
-                secondChar = value[secondCharIndex];
+                Debug.Assert(!source.IsEmpty, "Caller should've handled fully-consumed source in fast path.");
 
-                if (!WillEncode(firstChar))
+                if (destination.IsEmpty)
                 {
-                    wasSurrogatePair = false;
-                    output.Write(firstChar);
+                    destination = Array.Empty<char>(); // normalize empty destination buffers to non-nullptr reference; TryEncodeUnicodeScalar requires this
                 }
-                else
+
+                int destinationOffset = 0;
+                int sourceOffset = 0;
+                while ((uint)sourceOffset < (uint)source.Length)
                 {
-                    int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(firstChar, secondChar, out wasSurrogatePair, out bool _);
-                    if (!TryEncodeUnicodeScalar(nextScalar, buffer, bufferLength, out charsWritten))
+                    int scalarValue = source[sourceOffset];
+
+                    if (!UnicodeUtility.IsSurrogateCodePoint((uint)scalarValue))
+                    {
+                        if (!WillEncode(scalarValue))
+                        {
+                            // single input char -> single output char (no escaping needed)
+                            if ((uint)destinationOffset >= (uint)destination.Length)
+                            {
+                                goto DestinationTooSmall;
+                            }
+
+                            destination[destinationOffset++] = (char)scalarValue;
+                            sourceOffset++;
+                            continue;
+                        }
+                    }
+                    else
                     {
-                        ThrowArgumentException_MaxOutputCharsPerInputChar();
+                        uint firstCodePoint = (uint)scalarValue;
+                        scalarValue = '\uFFFD'; // replacement char, just in case we can't read a full surrogate pair
+                        if (UnicodeUtility.IsHighSurrogateCodePoint(firstCodePoint))
+                        {
+                            int nextSourceIdx = sourceOffset + 1;
+                            if ((uint)nextSourceIdx >= (uint)source.Length)
+                            {
+                                if (!isFinalBlock)
+                                {
+                                    goto NeedMoreData;
+                                }
+                            }
+                            else
+                            {
+                                uint nextCodePoint = source[nextSourceIdx];
+                                if (UnicodeUtility.IsLowSurrogateCodePoint(nextCodePoint))
+                                {
+                                    scalarValue = (int)UnicodeUtility.GetScalarFromUtf16SurrogatePair(firstCodePoint, nextCodePoint);
+                                    if (!WillEncode(scalarValue))
+                                    {
+                                        // 2 input chars -> 2 output chars (no escaping needed)
+                                        if ((uint)(destinationOffset + 1) >= (uint)destination.Length)
+                                        {
+                                            goto DestinationTooSmall;
+                                        }
+
+                                        destination[destinationOffset] = (char)firstCodePoint;
+                                        destination[destinationOffset + 1] = (char)nextCodePoint;
+                                        destinationOffset += 2;
+                                        sourceOffset += 2;
+                                        continue;
+                                    }
+                                }
+                            }
+                        }
                     }
-                    Write(output, buffer, charsWritten);
 
-                    if (wasSurrogatePair)
+                    // If we got to this point, we need to encode.
+
+                    int numCharsWrittenJustNow;
+                    unsafe
                     {
-                        secondCharIndex++;
+                        fixed (char* pDest = &MemoryMarshal.GetReference(destination))
+                        {
+                            Debug.Assert(pDest != null); // should've been handled on method entry
+                            Debug.Assert((uint)destinationOffset <= (uint)destination.Length);
+
+                            if (!TryEncodeUnicodeScalar(scalarValue, pDest + destinationOffset, destination.Length - destinationOffset, out numCharsWrittenJustNow))
+                            {
+                                goto DestinationTooSmall;
+                            }
+                        }
                     }
+
+                    Debug.Assert(numCharsWrittenJustNow <= destination.Length - destinationOffset, "TryEncodeUnicodeScalar wrote past end of buffer?");
+                    sourceOffset += UnicodeUtility.GetUtf16SequenceLength((uint)scalarValue);
+                    destinationOffset += numCharsWrittenJustNow;
                 }
+
+                OperationStatus retVal = OperationStatus.Done;
+
+            ReturnCommon:
+                Debug.Assert(sourceOffset <= source.Length);
+                Debug.Assert(destinationOffset <= destination.Length);
+                charsConsumed = sourceOffset;
+                charsWritten = destinationOffset;
+                return retVal;
+
+            NeedMoreData:
+                retVal = OperationStatus.NeedMoreData;
+                goto ReturnCommon;
+
+            DestinationTooSmall:
+                retVal = OperationStatus.DestinationTooSmall;
+                goto ReturnCommon;
             }
+        }
+
+        private void Encode(TextWriter output, ReadOnlySpan<char> value)
+        {
+            Debug.Assert(output != null);
+            Debug.Assert(!value.IsEmpty, "Caller should've special-cased 'no encoding needed'.");
 
-            if (!wasSurrogatePair || (secondCharIndex == valueLength))
+            // On each iteration of the main loop, we'll make sure we have at least this many chars left in the
+            // destination buffer. This should prevent us from making very chatty calls where we only make progress
+            // one char at a time.
+            int minBufferBumpEachIteration = Math.Max(MaxOutputCharactersPerInputCharacter, EncodeStartingOutputBufferSize);
+            char[] rentedArray = ArrayPool<char>.Shared.Rent(Math.Max(value.Length, minBufferBumpEachIteration));
+            Span<char> scratchBuffer = rentedArray;
+
+            do
             {
-                firstChar = value[valueLength - 1];
-                int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(firstChar, null, out wasSurrogatePair, out bool _);
-                if (!TryEncodeUnicodeScalar(nextScalar, buffer, bufferLength, out charsWritten))
+                Encode(value, scratchBuffer, out int charsConsumedJustNow, out int charsWrittenJustNow);
+                if (charsWrittenJustNow == 0 || (uint)charsWrittenJustNow > (uint)scratchBuffer.Length)
                 {
-                    ThrowArgumentException_MaxOutputCharsPerInputChar();
+                    ThrowArgumentException_MaxOutputCharsPerInputChar(); // couldn't make forward progress or returned bogus data
                 }
-                Write(output, buffer, charsWritten);
-            }
+
+                output.Write(rentedArray, 0, charsWrittenJustNow); // write char[], not Span<char>, for best compat & performance
+                value = value.Slice(charsConsumedJustNow);
+            } while (!value.IsEmpty);
+
+            ArrayPool<char>.Shared.Return(rentedArray);
         }
 
         private unsafe int FindFirstCharacterToEncode(ReadOnlySpan<char> text)
@@ -945,17 +886,6 @@ private static void ValidateRanges(int startIndex, int characterCount, int actua
             }
         }
 
-        private static unsafe void Write(TextWriter output, char* input, int inputLength)
-        {
-            Debug.Assert(output != null && input != null && inputLength >= 0);
-
-            while (inputLength-- > 0)
-            {
-                output.Write(*input);
-                input++;
-            }
-        }
-
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private byte[]? GetAsciiEncoding(byte value)
         {
@@ -995,6 +925,7 @@ private unsafe void InitializeAsciiCache()
                 }
 
                 _bitMaskLookupAsciiNeedsEscaping = vector;
+                _isAsciiCacheInitialized = true;
                 return;
             }
 #endif
diff --git a/src/libraries/System.Text.Encodings.Web/src/System/Text/Unicode/UnicodeHelpers.cs b/src/libraries/System.Text.Encodings.Web/src/System/Text/Unicode/UnicodeHelpers.cs
index add7a295b6094..544a941017629 100644
--- a/src/libraries/System.Text.Encodings.Web/src/System/Text/Unicode/UnicodeHelpers.cs
+++ b/src/libraries/System.Text.Encodings.Web/src/System/Text/Unicode/UnicodeHelpers.cs
@@ -15,11 +15,6 @@ namespace System.Text.Unicode
     /// </summary>
     internal static unsafe partial class UnicodeHelpers
     {
-        /// <summary>
-        /// Used for invalid Unicode sequences or other unrepresentable values.
-        /// </summary>
-        private const char UNICODE_REPLACEMENT_CHAR = '\uFFFD';
-
         /// <summary>
         /// The last code point defined by the Unicode specification.
         /// </summary>
@@ -239,134 +234,6 @@ internal static ReadOnlySpan<uint> GetDefinedCharacterBitmap()
             }
         }
 
-        /// <summary>
-        /// Given a UTF-16 character stream, reads the next scalar value from the stream.
-        /// Set 'endOfString' to true if 'pChar' points to the last character in the stream.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static int GetScalarValueFromUtf16(char first, char? second, out bool wasSurrogatePair, out bool needsMoreData)
-        {
-            if (!char.IsSurrogate(first))
-            {
-                wasSurrogatePair = false;
-                needsMoreData = false;
-                return first;
-            }
-
-            return GetScalarValueFromUtf16Slow(first, second, out wasSurrogatePair, out needsMoreData);
-        }
-
-        private static int GetScalarValueFromUtf16Slow(char first, char? second, out bool wasSurrogatePair, out bool needMoreData)
-        {
-#if DEBUG
-            if (!char.IsSurrogate(first))
-            {
-                Debug.Assert(false, "This case should've been handled by the fast path.");
-                wasSurrogatePair = false;
-                needMoreData = false;
-                return first;
-            }
-#endif
-            if (char.IsHighSurrogate(first))
-            {
-                if (second != null)
-                {
-                    if (char.IsLowSurrogate(second.Value))
-                    {
-                        // valid surrogate pair - extract codepoint
-                        wasSurrogatePair = true;
-                        needMoreData = false;
-                        return GetScalarValueFromUtf16SurrogatePair(first, second.Value);
-                    }
-                    else
-                    {
-                        // unmatched surrogate - substitute
-                        wasSurrogatePair = false;
-                        needMoreData = false;
-                        return UNICODE_REPLACEMENT_CHAR;
-                    }
-                }
-                else
-                {
-                    // unmatched surrogate - substitute
-                    wasSurrogatePair = false;
-                    needMoreData = true; // Last character was high surrogate; we need more data.
-                    return UNICODE_REPLACEMENT_CHAR;
-                }
-            }
-            else
-            {
-                // unmatched surrogate - substitute
-                Debug.Assert(char.IsLowSurrogate(first));
-                wasSurrogatePair = false;
-                needMoreData = false;
-                return UNICODE_REPLACEMENT_CHAR;
-            }
-        }
-
-        /// <summary>
-        /// Given a UTF-16 character stream, reads the next scalar value from the stream.
-        /// Set 'endOfString' to true if 'pChar' points to the last character in the stream.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static int GetScalarValueFromUtf16(char* pChar, bool endOfString)
-        {
-            // This method is marked as AggressiveInlining to handle the common case of a non-surrogate
-            // character. The surrogate case is handled in the slower fallback code path.
-            char thisChar = *pChar;
-            return (char.IsSurrogate(thisChar)) ? GetScalarValueFromUtf16Slow(pChar, endOfString) : thisChar;
-        }
-
-        private static int GetScalarValueFromUtf16Slow(char* pChar, bool endOfString)
-        {
-            char firstChar = pChar[0];
-
-            if (!char.IsSurrogate(firstChar))
-            {
-                Debug.Assert(false, "This case should've been handled by the fast path.");
-                return firstChar;
-            }
-            else if (char.IsHighSurrogate(firstChar))
-            {
-                if (endOfString)
-                {
-                    // unmatched surrogate - substitute
-                    return UNICODE_REPLACEMENT_CHAR;
-                }
-                else
-                {
-                    char secondChar = pChar[1];
-                    if (char.IsLowSurrogate(secondChar))
-                    {
-                        // valid surrogate pair - extract codepoint
-                        return GetScalarValueFromUtf16SurrogatePair(firstChar, secondChar);
-                    }
-                    else
-                    {
-                        // unmatched surrogate - substitute
-                        return UNICODE_REPLACEMENT_CHAR;
-                    }
-                }
-            }
-            else
-            {
-                // unmatched surrogate - substitute
-                Debug.Assert(char.IsLowSurrogate(firstChar));
-                return UNICODE_REPLACEMENT_CHAR;
-            }
-        }
-
-        private static int GetScalarValueFromUtf16SurrogatePair(char highSurrogate, char lowSurrogate)
-        {
-            Debug.Assert(char.IsHighSurrogate(highSurrogate));
-            Debug.Assert(char.IsLowSurrogate(lowSurrogate));
-
-            // See https://www.unicode.org/versions/Unicode6.2.0/ch03.pdf, Table 3.5 for the
-            // details of this conversion. We don't use Char.ConvertToUtf32 because its exception
-            // handling shows up on the hot path, and our caller has already sanitized the inputs.
-            return (lowSurrogate & 0x3ff) | (((highSurrogate & 0x3ff) + (1 << 6)) << 10);
-        }
-
         internal static void GetUtf16SurrogatePairFromAstralScalarValue(int scalar, out char highSurrogate, out char lowSurrogate)
         {
             Debug.Assert(0x10000 <= scalar && scalar <= UNICODE_LAST_CODEPOINT);
@@ -426,21 +293,6 @@ internal static int GetUtf8RepresentationForScalarValue(uint scalar)
             }
         }
 
-        /// <summary>
-        /// Returns a value stating whether a character is defined per the checked-in version
-        /// of the Unicode specification. Certain classes of characters (control chars,
-        /// private use, surrogates, some whitespace) are considered "undefined" for
-        /// our purposes.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static bool IsCharacterDefined(char c)
-        {
-            uint codePoint = (uint)c;
-            int index = (int)(codePoint >> 5);
-            int offset = (int)(codePoint & 0x1FU);
-            return ((GetDefinedCharacterBitmap()[index] >> offset) & 0x1U) != 0;
-        }
-
         /// <summary>
         /// Determines whether the given scalar value is in the supplementary plane and thus
         /// requires 2 characters to be represented in UTF-16 (as a surrogate pair).
@@ -450,24 +302,5 @@ internal static bool IsSupplementaryCodePoint(int scalar)
         {
             return ((scalar & ~((int)char.MaxValue)) != 0);
         }
-
-        /// <summary>
-        /// Returns <see langword="true"/> iff <paramref name="value"/> is a UTF-8 continuation byte;
-        /// i.e., has binary representation 10xxxxxx, where x is any bit.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static bool IsUtf8ContinuationByte(in byte value)
-        {
-            // This API takes its input as a readonly ref so that the JIT can emit "cmp ModRM" statements
-            // directly rather than bounce a temporary through a register. That is, we want the JIT to be
-            // able to emit a single "cmp byte ptr [data], C0h" statement if we're querying a memory location
-            // to see if it's a continuation byte. Data that's already enregistered will go through the
-            // normal "cmp reg, C0h" code paths, perhaps with some extra unnecessary "movzx" instructions.
-            //
-            // The below check takes advantage of the two's complement representation of negative numbers.
-            // [ 0b1000_0000, 0b1011_1111 ] is [ -127 (sbyte.MinValue), -65 ]
-
-            return ((sbyte)value < -64);
-        }
     }
 }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/AllowedCharsBitmapTests.cs b/src/libraries/System.Text.Encodings.Web/tests/AllowedCharsBitmapTests.cs
index 8e8b426c87c61..2dbf5b2f4c415 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/AllowedCharsBitmapTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/AllowedCharsBitmapTests.cs
@@ -119,7 +119,7 @@ public void ForbidUndefinedCharacters_RemovesUndefinedChars()
                 }
                 else
                 {
-                    Assert.Equal(UnicodeHelpers.IsCharacterDefined((char)i), bitmap.IsCharacterAllowed((char)i));
+                    Assert.Equal(UnicodeTestHelpers.IsCharacterDefined((char)i), bitmap.IsCharacterAllowed((char)i));
                 }
             }
         }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/ConfigurableScalarTextEncoder.cs b/src/libraries/System.Text.Encodings.Web/tests/ConfigurableScalarTextEncoder.cs
index e592f25c1441e..0f948fcff2405 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/ConfigurableScalarTextEncoder.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/ConfigurableScalarTextEncoder.cs
@@ -17,9 +17,59 @@ public ConfigurableScalarTextEncoder(Predicate<int> isScalarAllowed)
             _isScalarAllowed = isScalarAllowed;
         }
 
-        public override int MaxOutputCharactersPerInputCharacter => throw new NotImplementedException();
+        public override int MaxOutputCharactersPerInputCharacter => 8; // "[10FFFF]".Length
 
-        public override unsafe int FindFirstCharacterToEncode(char* text, int textLength) => throw new NotImplementedException();
+        public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
+            => FindFirstCharacterToEncode(new ReadOnlySpan<char>(text, textLength));
+
+        private int FindFirstCharacterToEncode(ReadOnlySpan<char> span)
+        {
+            int originalLength = span.Length;
+
+            while (!span.IsEmpty)
+            {
+                if (!TryGetNextScalarValue(span, out int scalarValue) || !_isScalarAllowed(scalarValue))
+                {
+                    return originalLength - span.Length; // couldn't extract scalar or failed predicate
+                }
+
+                span = span.Slice(UnicodeUtility.GetUtf16SequenceLength((uint)scalarValue));
+            }
+
+            return -1; // entire span was consumed
+        }
+
+        private static bool TryGetNextScalarValue(ReadOnlySpan<char> span, out int scalarValue)
+        {
+            if (!span.IsEmpty)
+            {
+                // non-surrogate char?
+                char firstChar = span[0];
+                if (!char.IsSurrogate(firstChar))
+                {
+                    scalarValue = firstChar;
+                    return true;
+                }
+
+                // well-formed surrogate pair?
+                if (char.IsHighSurrogate(firstChar))
+                {
+                    if (span.Length > 1)
+                    {
+                        char secondChar = span[1];
+                        if (char.IsLowSurrogate(secondChar))
+                        {
+                            scalarValue = char.ConvertToUtf32(firstChar, secondChar);
+                            return true;
+                        }
+                    }
+                }
+            }
+
+            // if we got to this point, span was empty or ill-formed surrogate found
+            scalarValue = default;
+            return false;
+        }
 
         public override bool WillEncode(int unicodeScalar) => !_isScalarAllowed(unicodeScalar);
 
diff --git a/src/libraries/System.Text.Encodings.Web/tests/HtmlEncoderTests.cs b/src/libraries/System.Text.Encodings.Web/tests/HtmlEncoderTests.cs
index 074bb3977cd30..36eff3f080a7e 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/HtmlEncoderTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/HtmlEncoderTests.cs
@@ -140,7 +140,7 @@ public void HtmlEncode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
                         {
                             mustEncode = true; // control char
                         }
-                        else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                        else if (!UnicodeTestHelpers.IsCharacterDefined((char)i))
                         {
                             mustEncode = true; // undefined (or otherwise disallowed) char
                         }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.Relaxed.cs b/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.Relaxed.cs
index b4a9f7d682f09..84b53ae56bcb9 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.Relaxed.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.Relaxed.cs
@@ -129,7 +129,7 @@ public void JavaScriptStringEncode_Relaxed_StillEncodesForbiddenChars_Extended()
                         {
                             mustEncode = true; // control char
                         }
-                        else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                        else if (!UnicodeTestHelpers.IsCharacterDefined((char)i))
                         {
                             mustEncode = true; // undefined (or otherwise disallowed) char
                         }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs b/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
index e918461f79b38..d88ab88b0c25b 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
@@ -694,7 +694,7 @@ public void JavaScriptStringEncode_AllRangesAllowed_StillEncodesForbiddenChars_E
                         {
                             mustEncode = true; // control char
                         }
-                        else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                        else if (!UnicodeTestHelpers.IsCharacterDefined((char)i))
                         {
                             mustEncode = true; // undefined (or otherwise disallowed) char
                         }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/ScalarTestEncoder.cs b/src/libraries/System.Text.Encodings.Web/tests/ScalarTestEncoder.cs
index 3481e50b31ace..2502a3d292ed1 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/ScalarTestEncoder.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/ScalarTestEncoder.cs
@@ -3,8 +3,6 @@
 
 using System;
 using System.Globalization;
-using System.IO;
-using System.Runtime.CompilerServices;
 
 namespace System.Text.Encodings.Web.Tests
 {
@@ -20,7 +18,7 @@ public sealed class ScalarTestEncoder : TextEncoder
         /// </summary>
         public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
         {
-            return text == null ? -1 : 0;
+            return (textLength == 0) ? -1 : 0;
         }
 
         /// <summary>
@@ -44,12 +42,16 @@ public override int MaxOutputCharactersPerInputCharacter
         /// </summary>
         public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
         {
-            fixed (char* chars = unicodeScalar.ToString("X8"))
-                for (int i = 0; i < Int32Length; i++)
-                    buffer[i] = chars[i];
-
-            numberOfCharactersWritten = Int32Length;
-            return true;
+            if (unicodeScalar.ToString("X8", CultureInfo.InvariantCulture).AsSpan().TryCopyTo(new Span<char>(buffer, bufferLength)))
+            {
+                numberOfCharactersWritten = Int32Length;
+                return true;
+            }
+            else
+            {
+                numberOfCharactersWritten = 0;
+                return false;
+            }
         }
     }
 }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj b/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj
index 5ad0343d6c04e..ae495e75a0c09 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj
+++ b/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj
@@ -14,6 +14,7 @@
     <Compile Include="..\src\System\Text\Unicode\UnicodeHelpers.cs" />
     <Compile Include="..\src\System\Text\Unicode\UnicodeHelpers.generated.cs" />
     <Compile Include="AllowedCharsBitmapTests.cs" />
+    <Compile Include="TextEncoderBatteryTests.cs" />
     <Compile Include="TextEncoderTests.cs" />
     <Compile Include="ConfigurableScalarTextEncoder.cs" />
     <Compile Include="ScalarTestEncoder.cs" />
@@ -38,6 +39,7 @@
     <Compile Include="UnicodeRangesTests.cs" />
     <Compile Include="UnicodeRangesTests.generated.cs" />
     <Compile Include="UnicodeRangeTests.cs" />
+    <Compile Include="UnicodeTestHelpers.cs" />
     <Compile Include="UrlEncoderTests.cs" />
   </ItemGroup>
   <ItemGroup>
@@ -54,6 +56,7 @@
              Link="Common\System\HexConverter.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' == 'net461'">
+    <PackageReference Include="System.ValueTuple" Version="$(SystemValueTupleVersion)" />
     <ProjectReference Include="..\src\System.Text.Encodings.Web.csproj" />
   </ItemGroup>
 </Project>
diff --git a/src/libraries/System.Text.Encodings.Web/tests/TextEncoderBatteryTests.cs b/src/libraries/System.Text.Encodings.Web/tests/TextEncoderBatteryTests.cs
new file mode 100644
index 0000000000000..5a8f3e6a2df4d
--- /dev/null
+++ b/src/libraries/System.Text.Encodings.Web/tests/TextEncoderBatteryTests.cs
@@ -0,0 +1,241 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using Xunit;
+
+namespace System.Text.Encodings.Web.Tests
+{
+    public class TextEncoderBatteryTests
+    {
+        private static TextEncoder GetBatteryTextEncoder()
+        {
+            // only even-valued scalars are allowed; odd-valued scalars are disallowed
+            return new ConfigurableScalarTextEncoder(scalarValue => scalarValue % 2 == 0);
+        }
+
+        // 2 elements: [0] = input data (string), [1] = expected output data (string)
+        public static IEnumerable<object[]> TestData()
+        {
+            static IEnumerable<(string input, string output)> RealTestData()
+            {
+                yield return ("", "");
+                yield return ("xyz", "x[0079]z");
+                yield return ("bdf", "bdf");
+                yield return ("bdfbdfbdfbdfbdf", "bdfbdfbdfbdfbdf");
+                yield return ("\U0001F600" /* grinning face */, "\U0001F600"); // not escaped since scalar value is even
+                yield return ("\U0001F601" /* grinning face with smiling eyes */, "[1F601]"); // escaped since scalar value is odd
+                yield return ("\U0001F3C0\U0001F3C1\U0001F3C2\U0001F3C3\U0001F3C4" /* various sports emoji */,
+                    "\U0001F3C0[1F3C1]\U0001F3C2[1F3C3]\U0001F3C4");
+                yield return ("bd\ud800fh", "bd[FFFD]fh"); // standalone high surrogate char
+                yield return ("bd\udffffh", "bd[FFFD]fh"); // standalone low surrogate char
+                yield return ("bd\ue000fh", "bd\ue000fh");
+                yield return ("bd\ue001fh", "bd[E001]fh");
+                yield return ("bd\udfd0\ud83c\udfd0\ud83cfh", "bd[FFFD]\U0001F3D0[FFFD]fh"); // U+1F3D0 VOLLEYBALL
+                yield return ("bd\udfd1\ud83c\udfd1\ud83cfh", "bd[FFFD][1F3D1][FFFD]fh"); // U+1F3D1 FIELD HOCKEY STICK AND BALL
+                yield return ("\ufffd\ud800\ufffd", "[FFFD][FFFD][FFFD]"); // U+FFFD is escaped since is odd
+                yield return ("xyz\ud800", "x[0079]z[FFFD]"); // ends with standalone high surrogate char
+                yield return ("xyz\udfff", "x[0079]z[FFFD]"); // ends with standalone low surrogate char
+                yield return ("xyz\U0001F3C0", "x[0079]z\U0001F3C0"); // ends with valid surrogate pair
+
+                // really long input which does not need to be escaped
+                {
+                    StringBuilder sb = new StringBuilder();
+
+                    for (int i = 0x40; i < 0x4000; i += 2)
+                    {
+                        sb.Append((char)i);
+                    }
+
+                    yield return (sb.ToString(), sb.ToString());
+                }
+
+                // really long input which needs to be escaped
+                {
+                    StringBuilder sbInput = new StringBuilder();
+                    StringBuilder sbOutput = new StringBuilder();
+
+                    for (int i = 0x40; i < 0x4000; i++)
+                    {
+                        sbInput.Append((char)i);
+                        if (i % 2 == 0)
+                        {
+                            sbOutput.Append((char)i);
+                        }
+                        else
+                        {
+                            sbOutput.AppendFormat(CultureInfo.InvariantCulture, "[{0:X4}]", i);
+                        }
+                    }
+
+                    yield return (sbInput.ToString(), sbOutput.ToString());
+                }
+
+                // really long input which contains surrogate chars (no escape needed)
+                // also offset everything by 1 to account for the TextEncoder inner loop's
+                // "needs more data" handling logic.
+                {
+                    StringBuilder sb = new StringBuilder();
+
+                    for (int i = 0x10000; i < 0x14000; i += 2)
+                    {
+                        sb.Append(char.ConvertFromUtf32(i));
+                    }
+
+                    yield return (sb.ToString(), sb.ToString());
+                    yield return ("x" + sb.ToString(), "x" + sb.ToString());
+                }
+            }
+
+            foreach ((string input, string output) in RealTestData())
+            {
+                yield return new[] { Escape(input), Escape(output) };
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void Encode_String(string input, string expectedOutput)
+        {
+            input = Unescape(input);
+            expectedOutput = Unescape(expectedOutput);
+
+            // Arrange
+
+            TextEncoder encoder = GetBatteryTextEncoder();
+
+            // Act
+
+            string actualOutput = encoder.Encode(input);
+
+            // Assert
+
+            Assert.Equal(expectedOutput, actualOutput);
+        }
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void Encode_TextWriter_String(string input, string expectedOutput)
+        {
+            input = Unescape(input);
+            expectedOutput = Unescape(expectedOutput);
+
+            // Arrange
+
+            TextEncoder encoder = GetBatteryTextEncoder();
+            StringWriter writer = new StringWriter();
+
+            // Act
+
+            encoder.Encode(writer, input);
+
+            // Assert
+
+            Assert.Equal(expectedOutput, writer.ToString());
+        }
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void Encode_TextWriter_String_WithOffset(string input, string expectedOutput)
+        {
+            input = Unescape(input);
+            expectedOutput = Unescape(expectedOutput);
+
+            // Arrange
+
+            TextEncoder encoder = GetBatteryTextEncoder();
+            StringWriter writer;
+
+            // Act & assert - 1
+
+            writer = new StringWriter();
+            encoder.Encode(writer, input, 0, input.Length);
+            Assert.Equal(expectedOutput, writer.ToString());
+
+            // Act & assert - 2
+
+            writer = new StringWriter();
+            encoder.Encode(writer, "xxx" + input + "yyy", 3, input.Length);
+            Assert.Equal(expectedOutput, writer.ToString());
+
+            // Act & assert - 3
+
+            writer = new StringWriter();
+            encoder.Encode(writer, "\ud800" + input + "\udfff", 1, input.Length);
+            Assert.Equal(expectedOutput, writer.ToString());
+        }
+
+        [Theory]
+        [MemberData(nameof(TestData))]
+        public void Encode_TextWriter_CharArray_WithOffset(string input, string expectedOutput)
+        {
+            input = Unescape(input);
+            expectedOutput = Unescape(expectedOutput);
+
+            // Arrange
+
+            TextEncoder encoder = GetBatteryTextEncoder();
+            StringWriter writer;
+
+            // Act & assert - 1
+
+            writer = new StringWriter();
+            encoder.Encode(writer, input.ToCharArray(), 0, input.Length);
+            Assert.Equal(expectedOutput, writer.ToString());
+
+            // Act & assert - 2
+
+            writer = new StringWriter();
+            encoder.Encode(writer, ("xxx" + input + "yyy").ToCharArray(), 3, input.Length);
+            Assert.Equal(expectedOutput, writer.ToString());
+
+            // Act & assert - 3
+
+            writer = new StringWriter();
+            encoder.Encode(writer, ("\ud800" + input + "\udfff").ToCharArray(), 1, input.Length);
+            Assert.Equal(expectedOutput, writer.ToString());
+        }
+
+        /*
+         * ESCAPING & UNESCAPING
+         * =====================
+         *
+         * The xunit runner doesn't like strings that contain malformed UTF-16 data.
+         * To smuggle malformed UTF-16 data across the test runner, we'll encode all surrogate
+         * chars (not supplementary chars) as @XXXX. A supplementary char is thus represented
+         * as @XXXX@YYYY (10 chars total) in the stream.
+         */
+
+        private static string Escape(string value)
+        {
+            value = value.Replace(@"@", @"@0040");
+            StringBuilder sb = new StringBuilder(value.Length);
+            foreach (char ch in value)
+            {
+                sb.Append(char.IsSurrogate(ch) ? FormattableString.Invariant($@"@{(int)ch:X4}") : ch);
+            }
+            return sb.ToString();
+        }
+
+        private static string Unescape(string value)
+        {
+            StringBuilder sb = new StringBuilder(value.Length);
+            for (int i = 0; i < value.Length; i++)
+            {
+                char ch = value[i];
+                if (ch != '@')
+                {
+                    sb.Append(ch);
+                }
+                else
+                {
+                    sb.Append((char)ushort.Parse(value.Substring(i + 1, 4), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture));
+                    i += 4;
+                }
+            }
+            return sb.ToString();
+        }
+    }
+}
diff --git a/src/libraries/System.Text.Encodings.Web/tests/TextEncoderTests.cs b/src/libraries/System.Text.Encodings.Web/tests/TextEncoderTests.cs
index e841e746c505a..7c49984450456 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/TextEncoderTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/TextEncoderTests.cs
@@ -179,7 +179,7 @@ public void EncodeUtf8_MixedInputWhichRequiresEncodingOrReplacement()
                 {
                     destination = new byte[destinationLength];
 
-                        Assert.Equal(OperationStatus.Done, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: false));
+                    Assert.Equal(OperationStatus.Done, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: false));
                     Assert.Equal(aggregateInputBytesSoFar.Count, bytesConsumed);
                     Assert.Equal(expectedOutputBytesSoFar.Count, bytesWritten);
                     Assert.Equal(expectedOutputBytesSoFar.ToArray(), new Span<byte>(destination, 0, expectedOutputBytesSoFar.Count).ToArray());
@@ -275,5 +275,87 @@ public void FindFirstCharToEncodeUtf8_IllFormedData_ReturnsIndexOfIllFormedSubse
 
             Assert.Equal(expectedIndex, actualIndex);
         }
+
+        [Theory]
+        [InlineData("", 0, "", 0, OperationStatus.Done)]
+        [InlineData("", 20, "", 0, OperationStatus.Done)]
+        [InlineData("ABC", 0, "", 0, OperationStatus.DestinationTooSmall)]
+        [InlineData("ABC", 2, "AB", 2, OperationStatus.DestinationTooSmall)]
+        [InlineData("ABC", 3, "ABC", 3, OperationStatus.Done)]
+        [InlineData("ABC", 30, "ABC", 3, OperationStatus.Done)]
+        [InlineData("ABC+DEF", 3, "ABC", 3, OperationStatus.DestinationTooSmall)]
+        [InlineData("ABC+DEF", 8, "ABC", 3, OperationStatus.DestinationTooSmall)]
+        [InlineData("ABC+DEF", 9, "ABC[002B]", 4, OperationStatus.DestinationTooSmall)]
+        [InlineData("ABC+DEF", 12, "ABC[002B]DEF", 7, OperationStatus.Done)]
+        public void EncodeUtf16_OperationStatus_AlphaNumericOnly(string input, int destBufferSize, string expectedOutput, int expectedCharsConsumed, OperationStatus expectedResult)
+        {
+            // Arrange
+
+            var encoder = new ConfigurableScalarTextEncoder(scalar => UnicodeUtility.IsInRangeInclusive((uint)scalar | 0x20, 'a', 'z')); // allow only [A-Za-z] unescaped
+            using BoundedMemory<char> boundedInput = BoundedMemory.AllocateFromExistingData<char>(input.AsSpan());
+            using BoundedMemory<char> boundedOutput = BoundedMemory.Allocate<char>(destBufferSize);
+
+            // Act
+
+            OperationStatus actualResult = encoder.Encode(boundedInput.Span, boundedOutput.Span, out int actualCharsConsumed, out int actualCharsWritten);
+
+            // Assert
+
+            Assert.Equal(expectedResult, actualResult);
+            Assert.Equal(expectedCharsConsumed, actualCharsConsumed);
+            Assert.Equal(expectedOutput, boundedOutput.Span.Slice(0, actualCharsWritten).ToString());
+        }
+
+        [Theory]
+        [InlineData("ABC\U0001F600", 4, "ABC", 3, OperationStatus.DestinationTooSmall)] // don't allow breaking across a surrogate
+        [InlineData("ABC\U0001F600", 5, "ABC\U0001F600", 5, OperationStatus.Done)]
+        public void EncodeUtf16_OperationStatus_AllowEverything(string input, int destBufferSize, string expectedOutput, int expectedCharsConsumed, OperationStatus expectedResult)
+        {
+            // Arrange
+
+            var encoder = new ConfigurableScalarTextEncoder(_ => true); // allow all well-formed scalars
+            using BoundedMemory<char> boundedInput = BoundedMemory.AllocateFromExistingData<char>(input.AsSpan());
+            using BoundedMemory<char> boundedOutput = BoundedMemory.Allocate<char>(destBufferSize);
+
+            // Act
+
+            OperationStatus actualResult = encoder.Encode(boundedInput.Span, boundedOutput.Span, out int actualCharsConsumed, out int actualCharsWritten);
+
+            // Assert
+
+            Assert.Equal(expectedResult, actualResult);
+            Assert.Equal(expectedCharsConsumed, actualCharsConsumed);
+            Assert.Equal(expectedOutput, boundedOutput.Span.Slice(0, actualCharsWritten).ToString());
+        }
+
+        [Theory]
+        [InlineData(new[] { 'A', 'B', '\ud83d' }, 2, true, "AB", 2, OperationStatus.DestinationTooSmall)]
+        [InlineData(new[] { 'A', 'B', '\ud83d' }, 2, false, "AB", 2, OperationStatus.NeedMoreData)]
+        [InlineData(new[] { 'A', 'B', '\ud83d' }, 3, true, "AB", 2, OperationStatus.DestinationTooSmall)]
+        [InlineData(new[] { 'A', 'B', '\ud83d' }, 3, false, "AB", 2, OperationStatus.NeedMoreData)]
+        [InlineData(new[] { 'A', 'B', '\ud83d' }, 10, true, "AB[FFFD]", 3, OperationStatus.Done)]
+        [InlineData(new[] { 'A', 'B', '\ud83d' }, 10, false, "AB", 2, OperationStatus.NeedMoreData)]
+        [InlineData(new[] { 'A', 'B', '\ud83d', '\ude00' }, 2, true, "AB", 2, OperationStatus.DestinationTooSmall)]
+        [InlineData(new[] { 'A', 'B', '\ud83d', '\ude00' }, 2, false, "AB", 2, OperationStatus.DestinationTooSmall)]
+        [InlineData(new[] { 'A', 'B', '\ud83d', '\ude00' }, 4, true, "AB\U0001F600", 4, OperationStatus.Done)]
+        [InlineData(new[] { 'A', 'B', '\ud83d', '\ude00' }, 4, false, "AB\U0001F600", 4, OperationStatus.Done)]
+        public void EncodeUtf16_OperationStatus_SurrogateHandlingEdgeCases(char[] input, int destBufferSize, bool isFinalBlock, string expectedOutput, int expectedCharsConsumed, OperationStatus expectedResult)
+        {
+            // Arrange
+
+            var encoder = new ConfigurableScalarTextEncoder(_ => true); // allow all well-formed scalars
+            using BoundedMemory<char> boundedInput = BoundedMemory.AllocateFromExistingData(input);
+            using BoundedMemory<char> boundedOutput = BoundedMemory.Allocate<char>(destBufferSize);
+
+            // Act
+
+            OperationStatus actualResult = encoder.Encode(boundedInput.Span, boundedOutput.Span, out int actualCharsConsumed, out int actualCharsWritten, isFinalBlock);
+
+            // Assert
+
+            Assert.Equal(expectedResult, actualResult);
+            Assert.Equal(expectedCharsConsumed, actualCharsConsumed);
+            Assert.Equal(expectedOutput, boundedOutput.Span.Slice(0, actualCharsWritten).ToString());
+        }
     }
 }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBase.cs b/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBase.cs
index 7e9a30b6aa184..77bb8410c74bb 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBase.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBase.cs
@@ -190,7 +190,22 @@ private void EncodeCore(ref Writer writer, char* input, uint charsRemaining)
         {
             while (charsRemaining != 0)
             {
-                int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(input, endOfString: (charsRemaining == 1));
+                int nextScalar = 0xFFFD; // Unicode replacement char
+
+                char nextChar = input[0];
+                if (!char.IsSurrogate(nextChar))
+                {
+                    nextScalar = nextChar;
+                }
+                else if (char.IsHighSurrogate(nextChar) && charsRemaining > 1)
+                {
+                    char followingChar = input[1];
+                    if (char.IsLowSurrogate(followingChar))
+                    {
+                        nextScalar = char.ConvertToUtf32(nextChar, followingChar);
+                    }
+                }
+
                 if (UnicodeHelpers.IsSupplementaryCodePoint(nextScalar))
                 {
                     // Supplementary characters should always be encoded numerically.
diff --git a/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBaseTests.cs b/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBaseTests.cs
index c72cc67b668ba..1db46b53e03e8 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBaseTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/UnicodeEncoderBaseTests.cs
@@ -89,7 +89,7 @@ public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
                     {
                         mustEncode = true; // control char
                     }
-                    else if (!UnicodeHelpers.IsCharacterDefined((char)i))
+                    else if (!UnicodeTestHelpers.IsCharacterDefined((char)i))
                     {
                         mustEncode = true; // undefined (or otherwise disallowed) char
                     }
diff --git a/src/libraries/System.Text.Encodings.Web/tests/UnicodeHelpersTests.cs b/src/libraries/System.Text.Encodings.Web/tests/UnicodeHelpersTests.cs
index 7b7f115640174..a20703f50241a 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/UnicodeHelpersTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/UnicodeHelpersTests.cs
@@ -20,68 +20,6 @@ public unsafe class UnicodeHelpersTests
 
         private static readonly UTF8Encoding _utf8EncodingThrowOnInvalidBytes = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
 
-        // To future refactorers:
-        // The following GetScalarValueFromUtf16_* tests must not be done as a [Theory].  If done via [InlineData], the invalid
-        // code points will get sanitized with replacement characters before they even reach the test, as the strings are parsed
-        // from the attributes in reflection.  And if done via [MemberData], the XmlWriter used by xunit will throw exceptions
-        // when it attempts to write out the test arguments, due to the invalid text.
-
-        [Fact]
-        public void GetScalarValueFromUtf16_NormalBMPChar_EndOfString()
-        {
-            GetScalarValueFromUtf16("a", 'a');
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_NormalBMPChar_NotEndOfString()
-        {
-            GetScalarValueFromUtf16("ab", 'a');
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_TrailingSurrogate_EndOfString()
-        {
-            GetScalarValueFromUtf16("\uDFFF", UnicodeReplacementChar);
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_TrailingSurrogate_NotEndOfString()
-        {
-            GetScalarValueFromUtf16("\uDFFFx", UnicodeReplacementChar);
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_LeadingSurrogate_EndOfString()
-        {
-            GetScalarValueFromUtf16("\uD800", UnicodeReplacementChar);
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_LeadingSurrogate_NotEndOfString()
-        {
-            GetScalarValueFromUtf16("\uD800x", UnicodeReplacementChar);
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_LeadingSurrogate_NotEndOfString_FollowedByLeadingSurrogate()
-        {
-            GetScalarValueFromUtf16("\uD800\uD800", UnicodeReplacementChar);
-        }
-
-        [Fact]
-        public void GetScalarValueFromUtf16_LeadingSurrogate_NotEndOfString_FollowedByTrailingSurrogate()
-        {
-            GetScalarValueFromUtf16("\uD800\uDFFF", 0x103FF);
-        }
-
-        private void GetScalarValueFromUtf16(string input, int expectedResult)
-        {
-            fixed (char* pInput = input)
-            {
-                Assert.Equal(expectedResult, UnicodeHelpers.GetScalarValueFromUtf16(pInput, endOfString: (input.Length == 1)));
-            }
-        }
-
         [Fact]
         public void GetUtf8RepresentationForScalarValue()
         {
@@ -111,7 +49,7 @@ public void GetUtf8RepresentationForScalarValue()
         [Fact]
         public void IsCharacterDefined()
         {
-            Assert.All(ReadListOfDefinedCharacters().Select((defined, idx) => new { defined, idx }), c => Assert.Equal(c.defined, UnicodeHelpers.IsCharacterDefined((char)c.idx)));
+            Assert.All(ReadListOfDefinedCharacters().Select((defined, idx) => new { defined, idx }), c => Assert.Equal(c.defined, UnicodeTestHelpers.IsCharacterDefined((char)c.idx)));
         }
 
         private static bool[] ReadListOfDefinedCharacters()
diff --git a/src/libraries/System.Text.Encodings.Web/tests/UnicodeTestHelpers.cs b/src/libraries/System.Text.Encodings.Web/tests/UnicodeTestHelpers.cs
new file mode 100644
index 0000000000000..e69d7156b2221
--- /dev/null
+++ b/src/libraries/System.Text.Encodings.Web/tests/UnicodeTestHelpers.cs
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Text.Unicode
+{
+    internal static class UnicodeTestHelpers
+    {
+        /// <summary>
+        /// Returns a value stating whether a character is defined per the checked-in version
+        /// of the Unicode specification. Certain classes of characters (control chars,
+        /// private use, surrogates, some whitespace) are considered "undefined" for
+        /// our purposes.
+        /// </summary>
+        internal static bool IsCharacterDefined(char c)
+        {
+            uint codePoint = (uint)c;
+            int index = (int)(codePoint >> 5);
+            int offset = (int)(codePoint & 0x1FU);
+            return ((UnicodeHelpers.GetDefinedCharacterBitmap()[index] >> offset) & 0x1U) != 0;
+        }
+    }
+}
diff --git a/src/libraries/System.Text.Encodings.Web/tests/UrlEncoderTests.cs b/src/libraries/System.Text.Encodings.Web/tests/UrlEncoderTests.cs
index 46bb504eaeec6..70fc8ad62c10f 100644
--- a/src/libraries/System.Text.Encodings.Web/tests/UrlEncoderTests.cs
+++ b/src/libraries/System.Text.Encodings.Web/tests/UrlEncoderTests.cs
@@ -114,7 +114,7 @@ public void UrlEncode_AllRangesAllowed_StillEncodesForbiddenChars()
                     }
                     else if ((0x00A0 <= i && i <= 0xD7FF) | (0xF900 <= i && i <= 0xFDCF) | (0xFDF0 <= i && i <= 0xFFEF))
                     {
-                        mustEncode = !UnicodeHelpers.IsCharacterDefined((char)i); // 'ucschar'
+                        mustEncode = !UnicodeTestHelpers.IsCharacterDefined((char)i); // 'ucschar'
                     }
                     else
                     {
diff --git a/src/libraries/libraries-packages.proj b/src/libraries/libraries-packages.proj
index 4bd094c79f66a..4cba0746c5bfc 100644
--- a/src/libraries/libraries-packages.proj
+++ b/src/libraries/libraries-packages.proj
@@ -23,6 +23,7 @@
     <!-- This is merge marker 1 to help automerge -->
     <ProjectReference Include="$(LibrariesProjectRoot)\System.Drawing.Common\pkg\System.Drawing.Common.pkgproj" Condition="'$(BuildAllConfigurations)' == 'true'" />
     <!-- This is merge marker 2 to help automerge --> 
+    <ProjectReference Include="$(LibrariesProjectRoot)\System.Text.Encodings.Web\pkg\System.Text.Encodings.Web.pkgproj" Condition="'$(BuildAllConfigurations)' == 'true'" />
     <!-- This is merge marker 3 to help automerge --> 
     <!-- This is merge marker 4 to help automerge --> 
     <!-- This is merge marker 5 to help automerge --> 
diff --git a/src/libraries/pkg/baseline/packageIndex.json b/src/libraries/pkg/baseline/packageIndex.json
index d129922956ab9..49111acbfe0d4 100644
--- a/src/libraries/pkg/baseline/packageIndex.json
+++ b/src/libraries/pkg/baseline/packageIndex.json
@@ -6440,21 +6440,28 @@
         "4.3.1",
         "4.4.0",
         "4.5.0",
-        "4.6.0",
-        "5.0.0"
+        "4.5.1",
+        "4.7.1",
+        "4.7.2",
+        "5.0.0",
+        "5.0.1"
       ],
-      "BaselineVersion": "5.0.0",
+      "BaselineVersion": "5.0.1",
       "InboxOn": {
         "netcoreapp3.0": "4.0.4.0",
-        "net5.0": "5.0.0.0"
+        "net5.0": "5.0.0.1"
       },
       "AssemblyVersionInPackageVersion": {
         "4.0.0.0": "4.0.0",
         "4.0.1.0": "4.3.0",
         "4.0.2.0": "4.4.0",
         "4.0.3.0": "4.5.0",
+        "4.0.3.1": "4.5.1",
         "4.0.4.0": "4.6.0",
-        "5.0.0.0": "5.0.0"
+        "4.0.5.0": "4.7.0",
+        "4.0.5.1": "4.7.2",
+        "5.0.0.0": "5.0.0",
+        "5.0.0.1": "5.0.1"
       }
     },
     "System.Text.Json": {