Skip to content

Commit

Permalink
Convert Perl scripts to C# in coreclr (#104867)
Browse files Browse the repository at this point in the history
* Convert Perl scripts to C# in coreclr

* Delete obsolete IL script

* Add license header and regenerate

* Add a README

* Mention generator in ilasm readme and cleanups
  • Loading branch information
am11 committed Jul 24, 2024
1 parent fcc916c commit 7847b75
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 667 deletions.
34 changes: 0 additions & 34 deletions src/coreclr/dlls/mscordac/update.pl

This file was deleted.

9 changes: 9 additions & 0 deletions src/coreclr/ilasm/GrammarExtractor/GrammarExtractor.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
98 changes: 98 additions & 0 deletions src/coreclr/ilasm/GrammarExtractor/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.IO;
using System.Text.RegularExpressions;
using static Patterns;

if (args.Length != 1)
{
Console.Error.WriteLine("Usage: <file>");
return 1;
}

string filePath = args[0];

string fileContent = File.ReadAllText(filePath);

var match = GetRegexExtractMarkers().Match(fileContent);
if (!match.Success)
{
Console.Error.WriteLine("Could not find %% markers");
return 1;
}

//string prefix = match.Groups[1].Value;
string grammar = match.Groups[2].Value;

// Remove any text in {}
var regexRemoveTextInBraces = GetRegexRemoveTextInBraces();
string previousGrammar;

do
{
previousGrammar = grammar;
grammar = regexRemoveTextInBraces.Replace(grammar, "$1");
} while (grammar != previousGrammar);

// Change keyword identifiers into the string they represent (lowercase)
grammar = GetRegexKeywordIdentifiers().Replace(grammar, m => $"'{m.Groups[1].Value.ToLowerInvariant()}'");

// Change assembler directives into their string (lowercase with a period)
grammar = GetRegexAssemblerDirectives().Replace(grammar, m => $"'.{m.Groups[1].Value.ToLowerInvariant()}'");

// Handle special punctuation
grammar = GetRegexEllipsis().Replace(grammar, "'...'");
grammar = GetRegexDcolon().Replace(grammar, "'::'");

// Print the output header
Console.Write(@"// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
Lexical tokens
ID - C style alphaNumeric identifier (e.g. Hello_There2)
DOTTEDNAME - Sequence of dot-separated IDs (e.g. System.Object)
QSTRING - C style quoted string (e.g. ""hi\n"")
SQSTRING - C style singlely quoted string(e.g. 'hi')
INT32 - C style 32 bit integer (e.g. 235, 03423, 0x34FFF)
INT64 - C style 64 bit integer (e.g. -2353453636235234, 0x34FFFFFFFFFF)
FLOAT64 - C style floating point number (e.g. -0.2323, 354.3423, 3435.34E-5)
INSTR_* - IL instructions of a particular class (see opcode.def).
HEXBYTE - 1- or 2-digit hexadecimal number (e.g., A2, F0).
Auxiliary lexical tokens
TYPEDEF_T - Aliased class (TypeDef or TypeRef).
TYPEDEF_M - Aliased method.
TYPEDEF_F - Aliased field.
TYPEDEF_TS - Aliased type specification (TypeSpec).
TYPEDEF_MR - Aliased field/method reference (MemberRef).
TYPEDEF_CA - Aliased Custom Attribute.
----------------------------------------------------------------------------------
START : decls
;");

// Print the output
Console.Write(grammar);

return 0;

internal static partial class Patterns
{
[GeneratedRegex(@"^(.*)%%(.*)%%", RegexOptions.Singleline)]
internal static partial Regex GetRegexExtractMarkers();

[GeneratedRegex(@"\s*([^'])\{[^{}]*\}", RegexOptions.Singleline)]
internal static partial Regex GetRegexRemoveTextInBraces();

[GeneratedRegex(@"\b([A-Z0-9_]+)_\b", RegexOptions.Singleline)]
internal static partial Regex GetRegexKeywordIdentifiers();

[GeneratedRegex(@"\b_([A-Z0-9]+)\b", RegexOptions.Singleline)]
internal static partial Regex GetRegexAssemblerDirectives();

[GeneratedRegex(@"\bELLIPSIS\b", RegexOptions.Singleline)]
internal static partial Regex GetRegexEllipsis();

[GeneratedRegex(@"\bDCOLON\b", RegexOptions.Singleline)]
internal static partial Regex GetRegexDcolon();
}
10 changes: 10 additions & 0 deletions src/coreclr/ilasm/GrammarExtractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Grammar extractor for IL tools

Tool to extract IL grammar in `Backus-Naur Form (BNF)` from `Yet Another Compiler-Compiler (Yacc)`.

Usage:

```sh
cd runtime
./dotnet.sh run --project src/coreclr/ilasm/GrammarExtractor src/coreclr/ilasm/asmparse.y > src/coreclr/ilasm/prebuilt/asmparse.grammar
```
1 change: 1 addition & 0 deletions src/coreclr/ilasm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ $ docker run --rm -v$(pwd):/runtime -w /runtime/src/coreclr/ilasm alpine \
sh -c 'apk add bison && yacc asmparse.y -o prebuilt/asmparse.cpp'
```

To generate grammar, see [GrammarExtractor README](GrammarExtractor/README.md).
59 changes: 0 additions & 59 deletions src/coreclr/ilasm/extractGrammar.pl

This file was deleted.

6 changes: 3 additions & 3 deletions src/coreclr/ilasm/prebuilt/asmparse.grammar
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ dottedName : id
| dottedName '.' dottedName
;

int32 : INT32
int32 : INT32_V
;

int64 : INT64
| INT32
int64 : INT64_V
| INT32_V
;

float64 : FLOAT64
Expand Down
Loading

0 comments on commit 7847b75

Please sign in to comment.