Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono] Fix assembly name parser to accommodate non-ASCII UTF8 strings #103363

Merged
merged 5 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,14 @@ public void TestTypeIdentifierAttribute()
Assert.Equal(42, mi.Invoke(null, args));
}

[Fact]
public void TestAssemblyNameWithInternationalChar()
{
Type testObj = typeof(Hello工程123.Program);
var t = Type.GetType(testObj.AssemblyQualifiedName);
Assert.NotNull(t);
}

[Fact]
public void IgnoreLeadingDotForTypeNamesWithoutNamespace()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using System;

namespace System.Reflection.Hello工程123
{
public class Program
{
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
</PropertyGroup>
<ItemGroup>
<Compile Include="Hello工程123.cs" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
<ProjectReference Include="UnloadableAssembly\UnloadableAssembly.csproj" />
<ProjectReference Include="TestExe\System.Reflection.TestExe.csproj" />
<ProjectReference Include="TestAssembly\TestAssembly.csproj" />
<ProjectReference Include="Hello工程123\Hello工程123.csproj" />
</ItemGroup>
<ItemGroup Condition="'$(TargetOS)' == 'browser'">
<WasmFilesToIncludeFromPublishDir Include="$(AssemblyName).dll" />
Expand All @@ -87,5 +88,6 @@

<!-- Assemblies that should be excluded from the bundle -->
<__ExcludeFromBundle Include="TestAssembly.dll" />
<__ExcludeFromBundle Include="Hello工程123.dll" />
</ItemGroup>
</Project>
1 change: 1 addition & 0 deletions src/mono/mono/eglib/eglib-remap.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
#define g_wtf8_to_utf16 monoeg_g_wtf8_to_utf16
#define g_utf8_to_utf16_custom_alloc monoeg_g_utf8_to_utf16_custom_alloc
#define g_utf8_to_utf16le_custom_alloc monoeg_g_utf8_to_utf16le_custom_alloc
#define g_utf8_validate_part monoeg_g_utf8_validate_part
#define g_utf8_validate monoeg_g_utf8_validate
#define g_vasprintf monoeg_g_vasprintf
#define g_assertion_disable_global monoeg_assertion_disable_global
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/eglib/glib.h
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,7 @@ g_async_safe_printf (gchar const *format, ...)
*/
extern const guchar g_utf8_jump_table[256];

gboolean g_utf8_validate_part (const unsigned char *inptr, size_t len);
gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end);
glong g_utf8_strlen (const gchar *str, gssize max);

Expand Down
8 changes: 4 additions & 4 deletions src/mono/mono/eglib/gutf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ const guchar g_utf8_jump_table[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};

static gboolean
utf8_validate (const unsigned char *inptr, size_t len)
gboolean
g_utf8_validate_part (const unsigned char *inptr, size_t len)
{
const unsigned char *ptr = inptr + len;
unsigned char c;
Expand Down Expand Up @@ -105,7 +105,7 @@ g_utf8_validate (const gchar *str, gssize max_len, const gchar **end)
if (max_len < 0) {
while (*inptr != 0) {
length = g_utf8_jump_table[*inptr];
if (!utf8_validate (inptr, length)) {
if (!g_utf8_validate_part (inptr, length)) {
valid = FALSE;
break;
}
Expand All @@ -124,7 +124,7 @@ g_utf8_validate (const gchar *str, gssize max_len, const gchar **end)
length = g_utf8_jump_table[*inptr];
min = MIN (length, GSSIZE_TO_UINT (max_len - n));

if (!utf8_validate (inptr, min)) {
if (!g_utf8_validate_part (inptr, min)) {
valid = FALSE;
break;
}
Expand Down
15 changes: 12 additions & 3 deletions src/mono/mono/metadata/reflection.c
Original file line number Diff line number Diff line change
Expand Up @@ -1548,8 +1548,17 @@ assembly_name_to_aname (MonoAssemblyName *assembly, char *p)
}
assembly->name = p;
s = p;
while (*p && (isalnum (*p) || *p == '.' || *p == '-' || *p == '_' || *p == '$' || *p == '@' || g_ascii_isspace (*p)))
p++;
guchar *inptr = (guchar *) p;
while (*p && (*p != ',') && (*p != '\0')) {
if (quoted && (*p == '"'))
break;
guint length = g_utf8_jump_table[*inptr];
if (!g_utf8_validate_part (inptr, length)) {
lambdageek marked this conversation as resolved.
Show resolved Hide resolved
return 0;
}
p += length;
inptr += length;
}
if (quoted) {
if (*p != '"')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the quoted path will fail now?

Copy link
Member Author

@fanyang-mono fanyang-mono Jun 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does quoted path look like?

Copy link
Member

@lewing lewing Jun 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like there are some examples here https://github.com/adamsitnik/runtime/blob/b57b100933e306242ae422b9995b47fb3d778dbd/src/libraries/System.Runtime/tests/System.Reflection.Tests/AssemblyNameTests.cs#L37

but for the purposes of my comment I was just pointing out that if the name was qoted your change would walk to the , not the " then return 1 when if (*p != '"') was true

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea. @fanyang-mono I would suggest doing two separate loops for quoted and !quoted. if it's quoted, just take everything until the next close quote (I'm not sure if there's a possibility of escaping a quote). if !quoted, go until the first comma.


I can't actually find anything in the docs about quoted assembly names or about escaping quotes.

https://learn.microsoft.com/en-us/dotnet/api/system.reflection.assemblyname?view=net-8.0#remarks
https://learn.microsoft.com/en-us/dotnet/api/system.type.assemblyqualifiedname?view=net-8.0#remarks

Maybe this is some mono invention, or maybe it's backward compat for .NET Framework? need to check what CoreCLR does

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, just for clarity if you really want to walk character by character p += g_utf8_jump_table[*p]; is how you could do it here or with more validation like https://github.com/adamsitnik/runtime/blob/4246ba19bd196c5f374d94e5c1fc7b21d53bd9fc/src/mono/mono/eglib/gutf8.c#L106-L113

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like CoreCLR these days AssemblyNameParser and TryGetNextToken does try to handle single and double quotes and escape characters:

if (quoteChar != 0 && c == quoteChar)
break; // Terminate: Found closing quote of quoted string.
if (quoteChar == 0 && (c is ',' or '='))
{
_index--;
break; // Terminate: Found start of a new ',' or '=' token.
}
if (quoteChar == 0 && (c is '\'' or '\"'))
{
token = default;
return false;
}
if (c is '\\')
{
if (!TryGetNextChar(out c))
{
token = default;
return false;
}
switch (c)
{
case '\\':
case ',':
case '=':
case '\'':
case '"':
sb.Append(c);
break;
case 't':
sb.Append('\t');
break;
case 'r':
sb.Append('\r');
break;
case 'n':
sb.Append('\n');
break;
default:
token = default;
return false;
}
}
else
{
sb.Append(c);
}

Copy link
Member Author

@fanyang-mono fanyang-mono Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to share this logic between Mono and CoreCLR somehow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can probably share for most uses in reflection. The runtime also calls this internally from some places, and also I think the AOT compiler might need it. One idea is to compile the managed TypeNameParser as a library using NativeAOT and call it from the AOT compiler.

return 1;
Expand Down Expand Up @@ -1648,7 +1657,7 @@ assembly_name_to_aname (MonoAssemblyName *assembly, char *p)
found_sep = 1;
continue;
}
/* failed */
/* Done processing */
if (!found_sep)
return 1;
}
Expand Down
Loading