Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate skipped files concept and refine eventing model. #2675

Merged
merged 10 commits into from
May 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ReleaseHistory.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SARIF Package Release History (SDK, Driver, Converters, and Multitool)
## **v4.2.0** UNRELEASED
* BRK: Change `ArtifactProvicer.SizeInBytes` property type from `ulong` to `long`. [#2675](https://github.com/microsoft/sarif-sdk/pull/2675)
* BRK: Update `SarifLog.Post(Uri, StreamWriter, HttpClient)` return value to `HttpResponseMessage` (to make returned correlation id and error messages available). [#2672](https://github.com/microsoft/sarif-sdk/pull/2672)
* BRK: `RuntimeConditions` now of type `long` to permit more flag values. Many literal values have changed for individual members. [#2660](https://github.com/microsoft/sarif-sdk/pull/2660)
* BRK: `RuntimeConditions.OneOrMoreFilesSkippedDueToSize` renamed to `OneOrMoreFilesSkippedDueToExceedingSizeLimits`. [#2660](https://github.com/microsoft/sarif-sdk/pull/2660)
Expand All @@ -26,6 +27,8 @@
* BUG: Generate `IAnalysisLogger.AnalyzingTarget` callbacks from `MulthreadedAnalyzeCommandBase`. [#2637](https://github.com/microsoft/sarif-sdk/pull/2637)
* BUG: Persist `fileRegionsCache` parameter in `SarifLogger` to support retrieving hash data. [#2639](https://github.com/microsoft/sarif-sdk/pull/2639)
* BUG: Allow override of `FailureLevels` and `ResultKinds` in context objects. [#2639](https://github.com/microsoft/sarif-sdk/pull/2639)
* NEW: Add general `Notes.LogFileSkipped` notification mechanism for any skipped files. [#2675](https://github.com/microsoft/sarif-sdk/pull/2675)
* NEW: Add 50K files to analysis channel (rather than previous value of 25k). Smooths performance analyzing many small artifacts. [#2674](https://github.com/microsoft/sarif-sdk/pull/2674)
* NEW: Provide new ETW telemetry for runtime behavior, provider `SarifDriver`, guid `c84480b4-a77f-421f-8a11-48210c1724d4`. https://github.com/microsoft/sarif-sdk/pull/2668
* NEW: Provide convenience enumerator at the `SarifLog` level that iterates over all results in all runs in the log. [#2660](https://github.com/microsoft/sarif-sdk/pull/2660)
* NEW: Provide `Notes.LogEmptyFileSkipped` helper for reporting zero-byte files skipped at scan time. [#2660](https://github.com/microsoft/sarif-sdk/pull/2660)
Expand Down
117 changes: 69 additions & 48 deletions src/Sarif.Driver/DumpEventsCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
using System.IO;
using System.Text;

using Microsoft.CodeAnalysis.Sarif.Driver.Sdk;
using Microsoft.Diagnostics.Tracing;
using Microsoft.Diagnostics.Tracing.Analysis;

namespace Microsoft.CodeAnalysis.Sarif
{
Expand All @@ -17,14 +17,15 @@ public int Run(DumpEventsOptions options)
{
string path = options.EventsFilePath;

Guid guid = Guid.NewGuid();
var guid = Guid.NewGuid();
StreamWriter csvWriter = null;

Console.WriteLine($"Parsing events from {path} (this may take a while)...");

if (!string.IsNullOrWhiteSpace(options.CsvFilePath))
{
csvWriter = new StreamWriter(options.CsvFilePath);
csvWriter.WriteLine("SessionGuid,Timestamp,ThreadId,ProcessorId,EventName,TimeStampRelativeMSec,DurationMsec,FilePath,SizeInBytes,Level,RuleId,RuleName,Message");
csvWriter.WriteLine("SessionGuid,Timestamp,ThreadId,ProcessorId,EventName,TimeStampRelativeMSec,DurationMsec,FilePath,RuleId,RuleName,Data1,Data2,Message");
}

using (var source = new ETWTraceEventSource(path))
Expand All @@ -36,9 +37,9 @@ public int Run(DumpEventsOptions options)
TimeSpan timeSpentLoggingResults = default;
TimeSpan firstArtifactQueued = default;

ulong? sizeInBytes;
object data1, data2;

double? durationMsec;
FailureLevel? level;
string context, eventName, filePath, ruleId, ruleName;

var timingData = new Dictionary<StartStopKey, double>();
Expand All @@ -50,33 +51,30 @@ public int Run(DumpEventsOptions options)
{
eventName = traceEvent.EventName;
context = filePath = ruleId = ruleName = null;
sizeInBytes = null;
durationMsec = null;
level = null;

Guid correlationGuid = default;
data2 = data1 = durationMsec = null;

if (traceEvent.Opcode == TraceEventOpcode.Start)
{
correlationGuid = new Guid(traceEvent.ThreadID, 1, 5, 45, 23, 23, 3, 5, 5, 4, 5);
startStopKey = new StartStopKey(traceEvent.ProviderGuid, traceEvent.Task, correlationGuid);
string keyText = $"{traceEvent.PayloadByName(nameof(ruleId))}:{traceEvent.PayloadByName(nameof(ruleName))}:{traceEvent.ThreadID}";
startStopKey = new StartStopKey(traceEvent.ProviderGuid, traceEvent.Task, keyText);
timingData.Add(startStopKey, traceEvent.TimeStampRelativeMSec);
}

if (traceEvent.Opcode == TraceEventOpcode.Stop)
{
correlationGuid = new Guid(traceEvent.ThreadID, 1, 5, 45, 23, 23, 3, 5, 5, 4, 5);
startStopKey = new StartStopKey(traceEvent.ProviderGuid, traceEvent.Task, correlationGuid);
string keyText = $"{traceEvent.PayloadByName(nameof(ruleId))}:{traceEvent.PayloadByName(nameof(ruleName))}:{traceEvent.ThreadID}";
startStopKey = new StartStopKey(traceEvent.ProviderGuid, traceEvent.Task, keyText);
}

string formattedMessage = CsvEscape(traceEvent.FormattedMessage);

string formattedMessage = traceEvent.FormattedMessage.CsvEscape();
data1 = (string)traceEvent.PayloadByName(nameof(data1));
data2 = (string)traceEvent.PayloadByName(nameof(data2));
switch (traceEvent.EventName)
{
case "ArtifactSizeInBytes":
case DriverEventNames.ArtifactNotScanned:
{
filePath = (string)traceEvent.PayloadByName(nameof(filePath));
sizeInBytes = (ulong)traceEvent.PayloadByName(nameof(sizeInBytes));
data1 = traceEvent.PayloadByName("sizeInBytes");
break;
}

Expand Down Expand Up @@ -146,13 +144,13 @@ public int Run(DumpEventsOptions options)
break;
}

case "ReadArtifact/Start":
case DriverEventNames.ReadArtifactStart:
{
filePath = (string)traceEvent.PayloadByName(nameof(filePath));
break;
}

case "ReadArtifact/Stop":
case DriverEventNames.ReadArtifactStop:
{
filePath = (string)traceEvent.PayloadByName(nameof(filePath));

Expand All @@ -164,7 +162,8 @@ public int Run(DumpEventsOptions options)

case "RuleFired":
{
level = (FailureLevel)(uint)traceEvent.PayloadByName(nameof(level));
data1 = (FailureLevel)(int)(uint)traceEvent.PayloadByName("level");
data2 = traceEvent.PayloadByName("matchIdentifier");
ruleId = (string)traceEvent.PayloadByName(nameof(ruleId));
filePath = (string)traceEvent.PayloadByName(nameof(filePath));
ruleName = (string)traceEvent.PayloadByName(nameof(ruleName));
Expand All @@ -173,28 +172,31 @@ public int Run(DumpEventsOptions options)

case "RuleReserved0":
{
eventName = (string)traceEvent.PayloadByName("context");

ruleId = (string)traceEvent.PayloadByName(nameof(ruleId));
filePath = (string)traceEvent.PayloadByName(nameof(filePath));
ruleName = (string)traceEvent.PayloadByName(nameof(ruleName));
eventName = (string)traceEvent.PayloadByName("context");
break;
}

case "RuleReserved1/Start":
{
eventName = $"{(string)traceEvent.PayloadByName("context")}/Start";

ruleId = (string)traceEvent.PayloadByName(nameof(ruleId));
filePath = (string)traceEvent.PayloadByName(nameof(filePath));
ruleName = (string)traceEvent.PayloadByName(nameof(ruleName));
eventName = $"{(string)traceEvent.PayloadByName("context")}/Start";
break;
}

case "RuleReserved1/Stop":
{
eventName = $"{(string)traceEvent.PayloadByName("context")}/Stop";

ruleId = (string)traceEvent.PayloadByName(nameof(ruleId));
filePath = (string)traceEvent.PayloadByName(nameof(filePath));
ruleName = (string)traceEvent.PayloadByName(nameof(ruleName));
eventName = $"{(string)traceEvent.PayloadByName("context")}/Stop";

durationMsec = traceEvent.TimeStampRelativeMSec - timingData[startStopKey];
timingData.Remove(startStopKey);
Expand Down Expand Up @@ -267,11 +269,11 @@ public int Run(DumpEventsOptions options)

if (csvWriter != null)
{
filePath = CsvEscape(filePath);
filePath = filePath.CsvEscape();
csvWriter.WriteLine(
$"{guid},{traceEvent.TimeStamp:MM/dd/yyyy hh:mm:ss.ffff}, {traceEvent.ThreadID},{traceEvent.ProcessorNumber}," +
$"{eventName},{traceEvent.TimeStampRelativeMSec},{durationMsec}," +
$"{filePath},{sizeInBytes},{level},{ruleId},{ruleName},{formattedMessage}");
$"{guid},{traceEvent.TimeStamp:MM/dd/yyyy hh:mm:ss.ffff}, {traceEvent.ThreadID}," +
$"{traceEvent.ProcessorNumber},{eventName},{traceEvent.TimeStampRelativeMSec}," +
$"{durationMsec},{filePath},{ruleId},{ruleName},{data1},{data2},{formattedMessage}");
}
};

Expand All @@ -294,6 +296,7 @@ public int Run(DumpEventsOptions options)

DumpCustomTimingData(artifactReservedTiming);
}

return 0;
}

Expand All @@ -317,36 +320,54 @@ private void DumpCustomTimingData(Dictionary<string, double> timingData)
}
}

private readonly static StringBuilder s_converted = new StringBuilder();
public static string CsvEscape(string value)
private static Guid GenerateGuidFromText(string name)
{
if (string.IsNullOrEmpty(value)) { return string.Empty; }

s_converted.Clear();
s_converted.Append('"');

int copiedTo = 0;
while (true)
// The algorithm below is following the guidance of http://www.ietf.org/rfc/rfc4122.txt
// Create a blob containing a 16 byte number representing the namespace
// followed by the unicode bytes in the name.
byte[] bytes = new byte[name.Length * 2 + 16];
uint namespace1 = 0x482C2DB2;
uint namespace2 = 0xC39047c8;
uint namespace3 = 0x87F81A15;
uint namespace4 = 0xBFC130FB;
// Write the bytes most-significant byte first.
for (int i = 3; 0 <= i; --i)
{
int nextQuote = value.IndexOf('"', copiedTo);
if (nextQuote == -1) { break; }

s_converted.Append(value, copiedTo, nextQuote - copiedTo + 1);
s_converted.Append('"');
copiedTo = nextQuote + 1;
bytes[i] = (byte)namespace1;
namespace1 >>= 8;
bytes[i + 4] = (byte)namespace2;
namespace2 >>= 8;
bytes[i + 8] = (byte)namespace3;
namespace3 >>= 8;
bytes[i + 12] = (byte)namespace4;
namespace4 >>= 8;
}
// Write out the name, most significant byte first
for (int i = 0; i < name.Length; i++)
{
bytes[2 * i + 16 + 1] = (byte)name[i];
bytes[2 * i + 16] = (byte)(name[i] >> 8);
}

if (copiedTo < value.Length) { s_converted.Append(value, copiedTo, value.Length - copiedTo); }
s_converted.Append('"');
// Compute the Sha1 hash
var sha1 = System.Security.Cryptography.SHA1.Create(); // lgtm [cs/weak-crypto]
byte[] hash = sha1.ComputeHash(bytes);

// Create a GUID out of the first 16 bytes of the hash (SHA-1 create a 20 byte hash)
int a = (((((hash[3] << 8) + hash[2]) << 8) + hash[1]) << 8) + hash[0];
short b = (short)((hash[5] << 8) + hash[4]);
short c = (short)((hash[7] << 8) + hash[6]);

return s_converted.ToString();
c = (short)((c & 0x0FFF) | 0x5000); // Set high 4 bits of octet 7 to 5, as per RFC 4122
var guid = new Guid(a, b, c, hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15]);
return guid;
}

internal class StartStopKey : IEquatable<StartStopKey>
{
public StartStopKey(Guid provider, TraceEventTask task, Guid activityID) { Provider = provider; this.task = task; ActivityId = activityID; }
public StartStopKey(Guid provider, TraceEventTask task, string activityID) { Provider = provider; this.task = task; ActivityId = activityID; }
public Guid Provider;
public Guid ActivityId;
public string ActivityId;
public TraceEventTask task;

public override int GetHashCode()
Expand Down
25 changes: 1 addition & 24 deletions src/Sarif.Driver/OrderedFileSpecifier.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,7 @@ private IEnumerable<IEnumeratedArtifact> EnumeratedArtifacts()
this.cancellationToken.ThrowIfCancellationRequested();

string fullFilePath = Path.Combine(directory, file);

if (!IsTargetWithinFileSizeLimit(file, this.maxFileSizeInKilobytes, FileSystem, out long fileSizeInKb))
{
Skipped ??= new List<IEnumeratedArtifact>();
Skipped.Add(new EnumeratedArtifact(FileSystem)
{
Uri = new Uri(fullFilePath, UriKind.Absolute),
});
}
else
{
sortedFiles.Add(fullFilePath);
}
sortedFiles.Add(fullFilePath);
}

foreach (string file in sortedFiles)
Expand All @@ -121,17 +109,6 @@ private IEnumerable<IEnumeratedArtifact> EnumeratedArtifacts()
}
}

internal static bool IsTargetWithinFileSizeLimit(string path, long maxFileSizeInKB, IFileSystem fileSystem, out long fileSizeInKb)
{
fileSizeInKb = 0;
long size = fileSystem.FileInfoLength(path);
if (size == 0) { return false; };

size = Math.Min(long.MaxValue - 1023, size);
fileSizeInKb = (size + 1023) / 1024;
return fileSizeInKb <= maxFileSizeInKB;
}

private void EnqueueAllDirectories(Queue<string> queue, string directory)
{
this.cancellationToken.ThrowIfCancellationRequested();
Expand Down
5 changes: 5 additions & 0 deletions src/Sarif.Driver/Sdk/AnalyzeOptionsBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,10 @@ public IEnumerable<ResultKind> Kind
"max-file-size-in-kb",
HelpText = "The maximum file size (in kilobytes) that will be analyzed.")]
public long? MaxFileSizeInKilobytes { get; set; }

[Option(
"deny-regex",
HelpText = "A regular expression used to suppress scanning for any file or directory path that matches the regex.")]
public string GlobalFilePathDenyRegex { get; set; }
}
}
29 changes: 0 additions & 29 deletions src/Sarif.Driver/Sdk/DriverEvent.cs

This file was deleted.

30 changes: 30 additions & 0 deletions src/Sarif.Driver/Sdk/DriverEventId.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

namespace Microsoft.CodeAnalysis.Sarif.Driver
{
public enum DriverEventId : int
{
EnumerateArtifactsStart = 1,
EnumerateArtifactsStop = 2,
FirstArtifactQueued = 3,
ReadArtifactStart = 4,
ReadArtifactStop = 5,
ArtifactNotScanned = 6,
ScanArtifactStart = 7,
ScanArtifactStop = 8,
RuleNotCalled = 9,
RuleStart = 10,
RuleStop = 11,
RuleFired = 12,
LogResultsStart = 13,
LogResultsStop = 14,
RuleReserved0 = 15,
RuleReserved1Start = 16,
RuleReserved1Stop = 17,
ArtifactReserved0 = 18,
ArtifactReserved1Start = 19,
ArtifactReserved1Stop = 20,
SessionEnded = 21,
}
}
24 changes: 24 additions & 0 deletions src/Sarif.Driver/Sdk/DriverEventNames.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Runtime.CompilerServices;

namespace Microsoft.CodeAnalysis.Sarif.Driver.Sdk
{
public sealed class DriverEventNames
{
public const string ArtifactNotScanned = nameof(ArtifactNotScanned);

public const string ReadArtifact = nameof(ReadArtifact);
public const string ReadArtifactStop = $"{ReadArtifact}/Stop";
public const string ReadArtifactStart = $"{ReadArtifact}/Start";

// Reasons that an artifact might be skipped entirely.
public const string EmptyFile = nameof(EmptyFile);
public const string FilePathDenied = nameof(FilePathDenied);
public const string FilePathNotAllowed = nameof(FilePathNotAllowed);
public const string FileExceedsSizeLimits = nameof(FileExceedsSizeLimits);

public const string RuleNotCalled = nameof(RuleNotCalled);
}
}
Loading