Skip to content

Commit

Permalink
Fix GitHubDspIngestionVisitor (#2061)
Browse files Browse the repository at this point in the history
Add file-based unit tests for the `GitHubDspIngestionVisitor` and fix the bugs that they reveal:

- #2058: GitHubDspIngestionVisitor limits number of results only if some of them are non-errors
- #2060: GitHubDspIngestionVisitor removes codeFlows entirely rather than inlining threadFlowLocations

When inlining `threadFlowLocations`, we have to take care to distinguish those properties that are shared among all usages of a given location (for example, the `location` and `module` properties) from those properties that vary across usages (for example, `executionTimeUtc` and `state`). The `location.message` requires special handling because we do want to take `location` from the shared object, but we don't necessarily want to take `location.message`. Finally, we merge the property bags, preferring the values in the per-usage property bag if there are any overlaps.

We also have to merge property bags when inlining artifact locations.
  • Loading branch information
Larry Golding committed Sep 4, 2020
1 parent ffe27f2 commit e96e36c
Show file tree
Hide file tree
Showing 19 changed files with 926 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public Result CalculateBasedlinedResult(DictionaryMergeBehavior propertyBagMerge

SetFirstDetectionTime(result);

resultMatchingProperties = MergeDictionaryPreferFirst(resultMatchingProperties, originalResultMatchingProperties);
resultMatchingProperties = resultMatchingProperties.MergePreferFirst(originalResultMatchingProperties) as Dictionary<string, object>;

if (PreviousResult != null &&
propertyBagMergeBehavior == DictionaryMergeBehavior.InitializeFromOldest)
Expand Down Expand Up @@ -228,22 +228,6 @@ internal void SetFirstDetectionTime(Result targetResult)
targetResult.Provenance.FirstDetectionTimeUtc = firstDetectionTime;
}

private Dictionary<string, object> MergeDictionaryPreferFirst(
Dictionary<string, object> firstPropertyBag,
Dictionary<string, object> secondPropertyBag)
{
Dictionary<string, object> mergedPropertyBag = firstPropertyBag;

foreach (string key in secondPropertyBag.Keys)
{
if (!mergedPropertyBag.ContainsKey(key))
{
mergedPropertyBag[key] = secondPropertyBag[key];
}
}
return mergedPropertyBag;
}

public override string ToString()
{
if (PreviousResult != null && CurrentResult != null)
Expand Down
33 changes: 33 additions & 0 deletions src/Sarif/ExtensionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,39 @@ internal static string PropertyValue(this Dictionary<string, string> properties,
return null;
}

/// <summary>
/// Merge this property bag with another one, preferring the properties in this property
/// bag if there are any duplicates.
/// </summary>
/// <param name="propertyBag">
/// The property bag into which <paramref name="otherPropertyBag"/> is to be merged.
/// </param>
/// <param name="otherPropertyBag">
/// A property bag containing properties to merge into <paramref name="propertyBag"/>.
/// </param>
/// <typeparam name="T">
/// The type of object in the property bag. For SARIF property bags, the type will
/// be <see cref="SerializedPropertyInfo"/>.
/// </typeparam>
/// <returns>
/// The original <paramref name="propertyBag"/> object, into which the properties
/// from <paramref name="otherPropertyBag"/> have now been merged.
/// </returns>
public static IDictionary<string, T> MergePreferFirst<T>(
this IDictionary<string, T> propertyBag,
IDictionary<string, T> otherPropertyBag)
{
foreach (string key in otherPropertyBag.Keys)
{
if (!propertyBag.ContainsKey(key))
{
propertyBag[key] = otherPropertyBag[key];
}
}

return propertyBag;
}

/// <summary>Checks if a character is a newline.</summary>
/// <param name="testedCharacter">The character to check.</param>
/// <returns>true if newline, false if not.</returns>
Expand Down
128 changes: 111 additions & 17 deletions src/Sarif/Visitors/GitHubDspIngestionVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,29 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Collections.Generic;
using System.Linq;

namespace Microsoft.CodeAnalysis.Sarif.Visitors
{
{
public class GitHubDspIngestionVisitor : SarifRewritingVisitor
{
// DSP requires that every related location have a message. It's not clear why this
// requirement exists, as this data is mostly used to build embedded links from
// results (where the link anchor text actually resides).
private const string PlaceholderRelatedLocationMessage = "[No message provided.]";

// GitHub DSP reportedly has an ingestion limit of 500 issues.
// Internal static rather than private const to allow a unit test with a practical limit.
internal static int s_MaxResults = 500;

private IList<Artifact> artifacts;
private IList<ThreadFlowLocation> threadFlowLocations;

public override Run VisitRun(Run node)
{
this.artifacts = node.Artifacts;

this.threadFlowLocations = node.ThreadFlowLocations;

// DSP does not support submitting invocation objects. Invocations
// contains potentially sensitive environment details, such as
// account names embedded in paths. Invocations also store
Expand All @@ -31,7 +43,7 @@ public override Run VisitRun(Run node)
errorsCount++;
}
}

if (errorsCount != node.Results.Count)
{
var errors = new List<Result>();
Expand All @@ -43,32 +55,103 @@ public override Run VisitRun(Run node)
errors.Add(result);
}

// GitHub DSP reportedly has an ingestion limit of 500 issues
if (errors.Count > 500) { break; }
if (errors.Count == s_MaxResults) { break; }
}

node.Results = errors;
}

if (node.Results.Count > s_MaxResults)
{
node.Results = node.Results.Take(s_MaxResults).ToList();
}
}

node = base.VisitRun(node);

// DSP prefers a relative path local to the result. We clear
// the artifacts table, as all artifact information is now
// inlined with each result.
node.Artifacts = null;
node.Artifacts = null;

// DSP requires threadFlowLocations to be inlined in the result,
// not referenced from run.threadFlowLocations.
node.ThreadFlowLocations = null;

return node;
}

public override ThreadFlowLocation VisitThreadFlowLocation(ThreadFlowLocation node)
{
if (this.threadFlowLocations != null && node.Index > -1)
{
ThreadFlowLocation sharedLocation = this.threadFlowLocations[node.Index];

// Location.Message might vary per usage. For example, on one usage,
// we might be tracking an uninitialized variable, and the location
// might have the message "Uninitialized variable 'ptr' passed to 'f'".
// Another usage of the same location might have a different message,
// or none at all. So even though we are getting the location from
// the shared object, don't take its message unless the current object
// does not have one.
Message message = node.Location?.Message;

node.Location = sharedLocation.Location;

if (message != null)
{
// Make sure there's a place to put the message. threadFlowLocation.Location
// is not required, so the shared object might not have had one.
if (node.Location == null)
{
node.Location = new Location();
}

node.Location.Message = message;
}

// Copy other properties that should be the same for each usage of the
// shared location.
node.Kinds = sharedLocation.Kinds;
node.Module = sharedLocation.Module;

// Merge properties from the shared location, preferring the properties from
// this location if there are any duplicates.
MergeProperties(node, sharedLocation);

// Do NOT copy properties that can be different for each use of the shared
// threadFlowLocation:
// - ExecutionOrder
// - ExecutionTimeUtc
// - Importance
// - NestingLevel
// - Stack
// - State
// - Taxa
// - WebRequest
// - WebResponse

node.Index = -1;
}

return base.VisitThreadFlowLocation(node);
}

public override ArtifactLocation VisitArtifactLocation(ArtifactLocation node)
{
if (this.artifacts != null && node.Index > -1)
{
node.Uri = this.artifacts[node.Index].Location.Uri;
node.UriBaseId = this.artifacts[node.Index].Location.UriBaseId;
ArtifactLocation sharedLocation = this.artifacts[node.Index].Location;
node.Uri = sharedLocation.Uri;
node.UriBaseId = sharedLocation.UriBaseId;

// Merge properties from the shared location, preferring the properties from
// this location if there are any duplicates.
MergeProperties(node, sharedLocation);

node.Index = -1;
}

return base.VisitArtifactLocation(node);
}

Expand All @@ -78,15 +161,11 @@ public override Result VisitResult(Result node)
{
foreach (Location relatedLocation in node.RelatedLocations)
{
// DSP requires that every related location have a message. It's
// not clear why this requirement exists, as this data is mostly
// used to build embedded links from results (where the link
// anchor text actually resides).
if (string.IsNullOrEmpty(relatedLocation.Message?.Text))
{
relatedLocation.Message = new Message
{
Text = "[No message provided.]"
Text = PlaceholderRelatedLocationMessage
};
}
}
Expand All @@ -98,17 +177,32 @@ public override Result VisitResult(Result node)
// partial fingerprints property in order to prefer these
// values for matching (over DSP's built-in SARIF-driven
// results matching heuristics).
foreach(string fingerprintKey in node.Fingerprints.Keys)
foreach (string fingerprintKey in node.Fingerprints.Keys)
{
node.PartialFingerprints ??= new Dictionary<string, string>();
node.PartialFingerprints[fingerprintKey] = node.Fingerprints[fingerprintKey];
}
node.Fingerprints = null;
}

node.CodeFlows = null;

return base.VisitResult(node);
}

// Merge properties from a source to a target, preferring the existing properties
// on the target if there are any duplicates.
private static void MergeProperties(PropertyBagHolder target, PropertyBagHolder source)
{
// Are there any properties to merge?
if (source.Properties != null)
{
// If so, make sure there's someplace to put them.
if (target.Properties == null)
{
target.Properties = new Dictionary<string, SerializedPropertyInfo>();
}

target.Properties = target.Properties.MergePreferFirst(source.Properties);
}
}
}
}
28 changes: 28 additions & 0 deletions src/Test.UnitTests.Sarif/Test.UnitTests.Sarif.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@
<None Remove="TestData\Baseline\elfie-arriba.sarif" />
<None Remove="TestData\Baseline\SuppressionTestCurrent.sarif" />
<None Remove="TestData\Baseline\SuppressionTestPrevious.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\Fingerprints.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\NonErrorResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\RelatedLocationMessage.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\ThreadFlowLocations.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\TooManyResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithArtifacts.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithInvocation.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\Fingerprints.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\NonErrorResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\ThreadFlowLocations.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\TooManyResults.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\WithArtifacts.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\Inputs\WithInvocation.sarif" />
<None Remove="TestData\GitHubDspIngestionVisitor\RelatedLocationMessage.sarif" />
<None Remove="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Absolute_TextFiles.sarif" />
<None Remove="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Relative_All.sarif" />
<None Remove="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Relative_Guids.sarif" />
Expand Down Expand Up @@ -74,6 +88,20 @@
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\Fingerprints.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\NonErrorResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\ThreadFlowLocations.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithArtifacts.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\RelatedLocationMessage.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\WithInvocation.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\ExpectedOutputs\TooManyResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\NonErrorResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\Fingerprints.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\ThreadFlowLocations.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\WithArtifacts.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\WithInvocation.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\TooManyResults.sarif" />
<EmbeddedResource Include="TestData\GitHubDspIngestionVisitor\Inputs\RelatedLocationMessage.sarif" />
<EmbeddedResource Include="TestData\InsertOptionalDataVisitor\ExpectedOutputs\CoreTests-Relative_VersionControlInformation.sarif" />
<EmbeddedResource Include="TestData\Readers\elfie-arriba-utf8-bom.sarif" />
<EmbeddedResource Include="TestData\Baseline\elfie-arriba.sarif" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "Sarif.UnitTests"
}
},
"results": [
{
"ruleId": "TEST1001",
"level": "error",
"message": {
"text": "The message."
},
"partialFingerprints": {
"f1": "f1-value",
"f2": "f2-value"
}
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "Sarif.UnitTests"
}
},
"results": [
{
"ruleId": "TEST1002",
"level": "error",
"message": {
"text": "The message."
}
},
{
"ruleId": "TEST1003",
"level": "error",
"message": {
"text": "The message."
}
}
]
}
]
}
Loading

0 comments on commit e96e36c

Please sign in to comment.