Skip to content

Commit

Permalink
Improve Gremlin schema sync performance
Browse files Browse the repository at this point in the history
  • Loading branch information
kmcginnes committed Jul 16, 2024
1 parent b689f93 commit 66e5b57
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 36 deletions.
3 changes: 3 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

- Search in openCypher will now execute a single request when searching across
all labels (<https://github.com/aws/graph-explorer/pull/493>)
- Gremlin schema sync will be much faster on larger databases, thanks to
@dsaban-lightricks for his great suggestion in issue #225
(<https://github.com/aws/graph-explorer/pull/498>)

**Bug Fixes and Minor Changes**

Expand Down
46 changes: 29 additions & 17 deletions packages/graph-explorer/src/connector/gremlin/gremlinExplorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { fetchDatabaseRequest } from "../fetchDatabaseRequest";
import { GraphSummary } from "./types";
import { v4 } from "uuid";
import { Explorer } from "../useGEFetchTypes";
import { env } from "../../utils";
import { logger } from "../../utils";

function _gremlinFetch(connection: ConnectionConfig, options: any) {
return async (queryTemplate: string) => {
Expand All @@ -30,41 +30,53 @@ function _gremlinFetch(connection: ConnectionConfig, options: any) {
};
}

async function fetchSummary(
connection: ConnectionConfig,
options: RequestInit
) {
try {
const response = await fetchDatabaseRequest(
connection,
`${connection.url}/pg/statistics/summary?mode=detailed`,
{
method: "GET",
...options,
}
);
return response.payload.graphSummary as GraphSummary;
} catch (error) {
logger.error(
"[Gremlin Explorer] Failed to gather summary statistics",
error
);
}
}

export function createGremlinExplorer(connection: ConnectionConfig): Explorer {
return {
connection: connection,
async fetchSchema(options) {
let summary;
try {
const response = await fetchDatabaseRequest(
connection,
`${connection.url}/pg/statistics/summary?mode=detailed`,
{
method: "GET",
...options,
}
);
summary = (response.payload.graphSummary as GraphSummary) || undefined;
} catch (e) {
if (env.DEV) {
console.error("[Summary API]", e);
}
}
logger.log("[Gremlin Explorer] Fetching schema...");
const summary = await fetchSummary(connection, options);
return fetchSchema(_gremlinFetch(connection, options), summary);
},
async fetchVertexCountsByType(req, options) {
logger.log("[Gremlin Explorer] Fetching vertex counts by type...");
return fetchVertexTypeCounts(_gremlinFetch(connection, options), req);
},
async fetchNeighbors(req, options) {
logger.log("[Gremlin Explorer] Fetching neighbors...");
return fetchNeighbors(_gremlinFetch(connection, options), req);
},
async fetchNeighborsCount(req, options) {
logger.log("[Gremlin Explorer] Fetching neighbors count...");
return fetchNeighborsCount(_gremlinFetch(connection, options), req);
},
async keywordSearch(req, options) {
options ??= {};
options.queryId = v4();

logger.log("[Gremlin Explorer] Fetching keyword search...");
return keywordSearch(_gremlinFetch(connection, options), req);
},
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { sanitizeText } from "../../../utils";
import { logger, sanitizeText } from "../../../utils";
import type { SchemaResponse } from "../../useGEFetchTypes";
import edgeLabelsTemplate from "../templates/edgeLabelsTemplate";
import edgesSchemaTemplate from "../templates/edgesSchemaTemplate";
Expand Down Expand Up @@ -87,6 +87,7 @@ const fetchVertexLabels = async (
gremlinFetch: GremlinFetch
): Promise<Record<string, number>> => {
const labelsTemplate = vertexLabelsTemplate();
logger.log("[Gremlin Explorer] Fetching vertex labels with counts...");
const data = await gremlinFetch<RawVertexLabelsResponse>(labelsTemplate);

const values = data.result.data["@value"][0]["@value"];
Expand Down Expand Up @@ -121,6 +122,7 @@ const fetchVerticesAttributes = async (
types: labels,
});

logger.log("[Gremlin Explorer] Fetching vertices attributes...");
const response =
await gremlinFetch<RawVerticesSchemaResponse>(verticesTemplate);
const verticesSchemas = response.result.data["@value"][0]["@value"];
Expand Down Expand Up @@ -163,6 +165,7 @@ const fetchEdgeLabels = async (
gremlinFetch: GremlinFetch
): Promise<Record<string, number>> => {
const labelsTemplate = edgeLabelsTemplate();
logger.log("[Gremlin Explorer] Fetching edge labels with counts...");
const data = await gremlinFetch<RawEdgeLabelsResponse>(labelsTemplate);

const values = data.result.data["@value"][0]["@value"];
Expand All @@ -187,6 +190,7 @@ const fetchEdgesAttributes = async (
const edgesTemplate = edgesSchemaTemplate({
types: labels,
});
logger.log("[Gremlin Explorer] Fetching edges attributes...");
const data = await gremlinFetch<RawEdgesSchemaResponse>(edgesTemplate);

const edgesSchemas = data.result.data["@value"][0]["@value"];
Expand Down Expand Up @@ -238,6 +242,8 @@ const fetchSchema = async (
summary?: GraphSummary
): Promise<SchemaResponse> => {
if (!summary) {
logger.log("[Gremlin Explorer] No summary statistics");

const vertices = await fetchVerticesSchema(gremlinFetch);
const totalVertices = vertices.reduce((total, vertex) => {
return total + (vertex.total ?? 0);
Expand All @@ -256,6 +262,8 @@ const fetchSchema = async (
};
}

logger.log("[Gremlin Explorer] Using summary statistics");

const vertices = await fetchVerticesAttributes(
gremlinFetch,
summary.nodeLabels,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import edgesSchemaTemplate from "./edgesSchemaTemplate";
import { normalizeWithNoSpace as normalize } from "../../../utils/testing";

describe("Gremlin > edgesSchemaTemplate", () => {
it("Should return a template with the projection of each type", () => {
const template = edgesSchemaTemplate({ types: ["route", "contain"] });

expect(template).toBe(
'g.E().project("route","contain")' +
'.by(V().bothE("route").limit(1))' +
'.by(V().bothE("contain").limit(1))' +
".limit(1)"
expect(normalize(template)).toBe(
normalize(`
g.E()
.project("route", "contain")
.by(V().bothE("route").limit(1))
.by(V().bothE("contain").limit(1))
.limit(1)
`)
);
});
});
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dedent from "dedent";
import { uniq } from "lodash";

/**
Expand All @@ -14,7 +15,16 @@ import { uniq } from "lodash";
* .limit(1)
*/
export default function edgesSchemaTemplate({ types }: { types: string[] }) {
const labels = uniq(types.flatMap(type => type.split("::")));
// Labels with quotes
const labels = uniq(types.flatMap(type => type.split("::"))).map(
label => `"${label}"`
);

return `g.E().project(${labels.map(l => `"${l}"`).join(",")})${labels.map(l => `.by(V().bothE("${l}").limit(1))`).join("")}.limit(1)`;

return dedent`
g.E()
.project(${labels.join(", ")})
${labels.map(label => `.by(V().bothE(${label}).limit(1))`).join("\n ")}
.limit(1)
`;
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ describe("Gremlin > verticesSchemaTemplate", () => {

expect(normalize(template)).toBe(
normalize(`
g.V().project("airport","country")
.by(V().hasLabel("airport").limit(1))
.by(V().hasLabel("country").limit(1))
.limit(1)
g.V().union(
__.hasLabel("airport").limit(1),
__.hasLabel("country").limit(1)
)
.fold()
.project("airport", "country")
.by(unfold().hasLabel("airport"))
.by(unfold().hasLabel("country"))
`)
);
});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,40 @@
import dedent from "dedent";
import { uniq } from "lodash";

/**
* Given a set of nodes labels, it returns a Gremlin template that contains
* one sample of each node label.
*
* @example
* types = ["route", "contain"]
* types = ["airport", "country"]
*
* g.V()
* .project("airport","country")
* .by(V().hasLabel("airport").limit(1))
* .by(V().hasLabel("country").limit(1))
* .limit(1)
* .union(
* __.hasLabel("airport").limit(1),
* __.hasLabel("country").limit(1)
* )
* .fold()
* .project(
* "airport", "country"
* )
* .by(unfold().hasLabel("airport"))
* .by(unfold().hasLabel("country"))
*/
export default function verticesSchemaTemplate({ types }: { types: string[] }) {
const labels = uniq(types.flatMap(type => type.split("::")));
// Labels with quotes
const labels = uniq(types.flatMap(type => type.split("::"))).map(
label => `"${label}"`
);

return `g.V().project(${labels.map(l => `"${l}"`).join(",")})${labels.map(l => `.by(V().hasLabel("${l}").limit(1))`).join("")}.limit(1)`;
return dedent`
g.V()
.union(
${labels.map(label => `__.hasLabel(${label}).limit(1)`).join(",\n ")}
)
.fold()
.project(
${labels.join(",\n ")}
)
${labels.map(label => `.by(unfold().hasLabel(${label}))`).join("\n ")}
`;
}
1 change: 1 addition & 0 deletions packages/graph-explorer/src/utils/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ export { default as useClickOutside } from "./useClickOutside";
export { default as sanitizeText } from "./sanitizeText";
export { DEFAULT_SERVICE_TYPE } from "./constants";
export { default as escapeString } from "./escapeString";
export { default as logger } from "./logger";
export * from "./set";
export * from "./env";
29 changes: 29 additions & 0 deletions packages/graph-explorer/src/utils/logger.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* eslint-disable no-console */

import { env } from "./env";

/*
# DEV NOTE
This is a simple logging utility that will allow `console.log` calls any time
`env.DEV === true`. This will be useful for local development and debugging.
I can imagine a future where this logger has some additional functionality where
it can send errors to the server and maybe allow the use to enable debug logging
at runtime.
*/

export default {
/** Calls `console.log` if the app is running in DEV mode. */
log(message?: any, ...optionalParams: any[]) {
env.DEV && console.log(message, optionalParams);
},
/** Calls `console.warn`. */
warn(message?: any, ...optionalParams: any[]) {
console.warn(message, optionalParams);
},
/** Calls `console.error`. */
error(message?: any, ...optionalParams: any[]) {
console.error(message, optionalParams);
},
};

0 comments on commit 66e5b57

Please sign in to comment.