Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix GitHubDspIngestionVisitor #2061

Merged
10 commits merged into from
Sep 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public Result CalculateBasedlinedResult(DictionaryMergeBehavior propertyBagMerge

SetFirstDetectionTime(result);

resultMatchingProperties = MergeDictionaryPreferFirst(resultMatchingProperties, originalResultMatchingProperties);
resultMatchingProperties = resultMatchingProperties.MergePreferFirst(originalResultMatchingProperties) as Dictionary<string, object>;

if (PreviousResult != null &&
propertyBagMergeBehavior == DictionaryMergeBehavior.InitializeFromOldest)
Expand Down Expand Up @@ -228,22 +228,6 @@ internal void SetFirstDetectionTime(Result targetResult)
targetResult.Provenance.FirstDetectionTimeUtc = firstDetectionTime;
}

private Dictionary<string, object> MergeDictionaryPreferFirst(
Copy link
Author

@ghost ghost Sep 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MergeDictionaryPreferFirst [](start = 43, length = 26)

Moved to ExtensionMethods.cs because the GitHubDspIngestionVisitor now needs it, too. #ByDesign

Dictionary<string, object> firstPropertyBag,
Dictionary<string, object> secondPropertyBag)
{
Dictionary<string, object> mergedPropertyBag = firstPropertyBag;

foreach (string key in secondPropertyBag.Keys)
{
if (!mergedPropertyBag.ContainsKey(key))
{
mergedPropertyBag[key] = secondPropertyBag[key];
}
}
return mergedPropertyBag;
}

public override string ToString()
{
if (PreviousResult != null && CurrentResult != null)
Expand Down
33 changes: 33 additions & 0 deletions src/Sarif/ExtensionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,39 @@ internal static string PropertyValue(this Dictionary<string, string> properties,
return null;
}

/// <summary>
/// Merge this property bag with another one, preferring the properties in this property
/// bag if there are any duplicates.
/// </summary>
/// <param name="propertyBag">
/// The property bag into which <paramref name="otherPropertyBag"/> is to be merged.
/// </param>
/// <param name="otherPropertyBag">
/// A property bag containing properties to merge into <paramref name="propertyBag"/>.
/// </param>
/// <typeparam name="T">
/// The type of object in the property bag. For SARIF property bags, the type will
/// be <see cref="SerializedPropertyInfo"/>.
/// </typeparam>
/// <returns>
/// The original <paramref name="propertyBag"/> object, into which the properties
/// from <paramref name="otherPropertyBag"/> have now been merged.
/// </returns>
public static IDictionary<string, T> MergePreferFirst<T>(
Copy link
Author

@ghost ghost Sep 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T [](start = 62, length = 1)

The type parameter is needed because the GitHubDspIngestionVisitor uses this method with SARIF property bags -- so T is SerializedPropertyInfo -- whereas the result matching code where I found this method was using it with a dictionary whose values were of type object. #ByDesign

this IDictionary<string, T> propertyBag,
IDictionary<string, T> otherPropertyBag)
{
foreach (string key in otherPropertyBag.Keys)
{
if (!propertyBag.ContainsKey(key))
{
propertyBag[key] = otherPropertyBag[key];
}
}

return propertyBag;
}

/// <summary>Checks if a character is a newline.</summary>
/// <param name="testedCharacter">The character to check.</param>
/// <returns>true if newline, false if not.</returns>
Expand Down
128 changes: 111 additions & 17 deletions src/Sarif/Visitors/GitHubDspIngestionVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,29 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Collections.Generic;
using System.Linq;

namespace Microsoft.CodeAnalysis.Sarif.Visitors
{
{
public class GitHubDspIngestionVisitor : SarifRewritingVisitor
{
// DSP requires that every related location have a message. It's not clear why this
// requirement exists, as this data is mostly used to build embedded links from
// results (where the link anchor text actually resides).
private const string PlaceholderRelatedLocationMessage = "[No message provided.]";

// GitHub DSP reportedly has an ingestion limit of 500 issues.
// Internal static rather than private const to allow a unit test with a practical limit.
internal static int s_MaxResults = 500;

private IList<Artifact> artifacts;
private IList<ThreadFlowLocation> threadFlowLocations;

public override Run VisitRun(Run node)
{
this.artifacts = node.Artifacts;

this.threadFlowLocations = node.ThreadFlowLocations;

// DSP does not support submitting invocation objects. Invocations
// contains potentially sensitive environment details, such as
// account names embedded in paths. Invocations also store
Expand All @@ -31,7 +43,7 @@ public override Run VisitRun(Run node)
errorsCount++;
}
}

if (errorsCount != node.Results.Count)
{
var errors = new List<Result>();
Expand All @@ -43,32 +55,103 @@ public override Run VisitRun(Run node)
errors.Add(result);
}

// GitHub DSP reportedly has an ingestion limit of 500 issues
if (errors.Count > 500) { break; }
if (errors.Count == s_MaxResults) { break; }
}

node.Results = errors;
}

if (node.Results.Count > s_MaxResults)
{
node.Results = node.Results.Take(s_MaxResults).ToList();
}
}

node = base.VisitRun(node);

// DSP prefers a relative path local to the result. We clear
// the artifacts table, as all artifact information is now
// inlined with each result.
node.Artifacts = null;
node.Artifacts = null;

// DSP requires threadFlowLocations to be inlined in the result,
// not referenced from run.threadFlowLocations.
node.ThreadFlowLocations = null;

return node;
}

public override ThreadFlowLocation VisitThreadFlowLocation(ThreadFlowLocation node)
{
if (this.threadFlowLocations != null && node.Index > -1)
{
ThreadFlowLocation sharedLocation = this.threadFlowLocations[node.Index];

// Location.Message might vary per usage. For example, on one usage,
// we might be tracking an uninitialized variable, and the location
// might have the message "Uninitialized variable 'ptr' passed to 'f'".
// Another usage of the same location might have a different message,
// or none at all. So even though we are getting the location from
// the shared object, don't take its message unless the current object
// does not have one.
Message message = node.Location?.Message;

node.Location = sharedLocation.Location;

if (message != null)
{
// Make sure there's a place to put the message. threadFlowLocation.Location
// is not required, so the shared object might not have had one.
if (node.Location == null)
{
node.Location = new Location();
}

node.Location.Message = message;
}

// Copy other properties that should be the same for each usage of the
// shared location.
node.Kinds = sharedLocation.Kinds;
node.Module = sharedLocation.Module;

// Merge properties from the shared location, preferring the properties from
// this location if there are any duplicates.
MergeProperties(node, sharedLocation);

// Do NOT copy properties that can be different for each use of the shared
// threadFlowLocation:
// - ExecutionOrder
// - ExecutionTimeUtc
// - Importance
// - NestingLevel
// - Stack
// - State
// - Taxa
// - WebRequest
// - WebResponse

node.Index = -1;
}

return base.VisitThreadFlowLocation(node);
}

public override ArtifactLocation VisitArtifactLocation(ArtifactLocation node)
{
if (this.artifacts != null && node.Index > -1)
{
node.Uri = this.artifacts[node.Index].Location.Uri;
node.UriBaseId = this.artifacts[node.Index].Location.UriBaseId;
ArtifactLocation sharedLocation = this.artifacts[node.Index].Location;
node.Uri = sharedLocation.Uri;
node.UriBaseId = sharedLocation.UriBaseId;

// Merge properties from the shared location, preferring the properties from
// this location if there are any duplicates.
MergeProperties(node, sharedLocation);

node.Index = -1;
}

return base.VisitArtifactLocation(node);
}

Expand All @@ -78,15 +161,11 @@ public override Result VisitResult(Result node)
{
foreach (Location relatedLocation in node.RelatedLocations)
{
// DSP requires that every related location have a message. It's
// not clear why this requirement exists, as this data is mostly
// used to build embedded links from results (where the link
// anchor text actually resides).
if (string.IsNullOrEmpty(relatedLocation.Message?.Text))
{
relatedLocation.Message = new Message
{
Text = "[No message provided.]"
Text = PlaceholderRelatedLocationMessage
Copy link
Contributor

@rtaket rtaket Sep 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Text [](start = 28, length = 4)

I thought that Messages could have either a Text or Id property to define the string. Here, if the message contains an Id, we're adding a text string. Is that OK? #ByDesign

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... I should have added, I realize the previous code did this too, but I thought I'd ask anyway.


In reply to: 483750179 [](ancestors = 483750179)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good question. We're compensating here for a deficiency in the DSP -- the DSP looks for the Text property.


In reply to: 483750179 [](ancestors = 483750179)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... when in fact it shouldn't care about the presence of related location messages at all.


In reply to: 483751157 [](ancestors = 483751157,483750179)

};
}
}
Expand All @@ -98,17 +177,32 @@ public override Result VisitResult(Result node)
// partial fingerprints property in order to prefer these
// values for matching (over DSP's built-in SARIF-driven
// results matching heuristics).
foreach(string fingerprintKey in node.Fingerprints.Keys)
foreach (string fingerprintKey in node.Fingerprints.Keys)
{
node.PartialFingerprints ??= new Dictionary<string, string>();
node.PartialFingerprints[fingerprintKey] = node.Fingerprints[fingerprintKey];
}
node.Fingerprints = null;
}

node.CodeFlows = null;

return base.VisitResult(node);
}

// Merge properties from a source to a target, preferring the existing properties
// on the target if there are any duplicates.
private static void MergeProperties(PropertyBagHolder target, PropertyBagHolder source)
{
// Are there any properties to merge?
if (source.Properties != null)
{
// If so, make sure there's someplace to put them.
if (target.Properties == null)
{
target.Properties = new Dictionary<string, SerializedPropertyInfo>();
}

target.Properties = target.Properties.MergePreferFirst(source.Properties);
}
}
}
}
28 changes: 28 additions & 0 deletions src/Test.UnitTests.Sarif/Test.UnitTests.Sarif.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@
<None Remove="TestData\Baseline\elfie-arriba.sarif" />
<None Remove="TestData\Baseline\SuppressionTestCurrent.sarif" />
<None Remove="TestData\Baseline\SuppressionTestPrevious.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\Fingerprints.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\NonErrorResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\RelatedLocationMessage.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\ThreadFlowLocations.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\TooManyResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithArtifacts.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithInvocation.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\Fingerprints.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\NonErrorResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\ThreadFlowLocations.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\TooManyResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\WithArtifacts.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\WithInvocation.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\RelatedLocationMessage.sarif" />
<None Remove="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Absolute_TextFiles.sarif" />
<None Remove="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Relative_All.sarif" />
<None Remove="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Relative_Guids.sarif" />
Expand Down Expand Up @@ -74,6 +88,20 @@
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\Fingerprints.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\NonErrorResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\ThreadFlowLocations.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithArtifacts.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\RelatedLocationMessage.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithInvocation.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\TooManyResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\NonErrorResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\Fingerprints.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\ThreadFlowLocations.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\WithArtifacts.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\WithInvocation.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\TooManyResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\RelatedLocationMessage.sarif" />
<EmbeddedResource Include="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Relative_VersionControlInformation.sarif" />
<EmbeddedResource Include="TestData\Readers\elfie-arriba-utf8-bom.sarif" />
<EmbeddedResource Include="TestData\Baseline\elfie-arriba.sarif" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "Sarif.UnitTests"
}
},
"results": [
{
"ruleId": "TEST1001",
"level": "error",
"message": {
"text": "The message."
},
"partialFingerprints": {
"f1": "f1-value",
"f2": "f2-value"
}
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "Sarif.UnitTests"
}
},
"results": [
{
"ruleId": "TEST1002",
"level": "error",
"message": {
"text": "The message."
}
},
{
"ruleId": "TEST1003",
"level": "error",
"message": {
"text": "The message."
}
}
]
}
]
}
Loading