Skip to content

[feature branch] Search Content API #786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions packages/chatbot-server-mongodb-public/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ import {
import { redactConnectionUri } from "./middleware/redactConnectionUri";
import path from "path";
import express from "express";
import { logger } from "mongodb-rag-core";
import { logger, makeMongoDbSearchResultsStore } from "mongodb-rag-core";
import { createAzure } from "mongodb-rag-core/aiSdk";
import {
wrapOpenAI,
wrapTraced,
Expand All @@ -40,7 +41,6 @@ import {
import { AzureOpenAI } from "mongodb-rag-core/openai";
import { MongoClient } from "mongodb-rag-core/mongodb";
import {
ANALYZER_ENV_VARS,
AZURE_OPENAI_ENV_VARS,
PREPROCESSOR_ENV_VARS,
TRACING_ENV_VARS,
Expand All @@ -57,7 +57,7 @@ import { makeGenerateResponseWithSearchTool } from "./processors/generateRespons
import { makeBraintrustLogger } from "mongodb-rag-core/braintrust";
import { makeMongoDbScrubbedMessageStore } from "./tracing/scrubbedMessages/MongoDbScrubbedMessageStore";
import { MessageAnalysis } from "./tracing/scrubbedMessages/analyzeMessage";
import { createAzure } from "mongodb-rag-core/aiSdk";
import { makeFindContentWithMongoDbMetadata } from "./processors/findContentWithMongoDbMetadata";

export const {
MONGODB_CONNECTION_URI,
Expand Down Expand Up @@ -120,6 +120,11 @@ export const embeddedContentStore = makeMongoDbEmbeddedContentStore({
},
});

export const searchResultsStore = makeMongoDbSearchResultsStore({
connectionUri: MONGODB_CONNECTION_URI,
databaseName: MONGODB_DATABASE_NAME,
});

export const verifiedAnswerConfig = {
embeddingModel: OPENAI_VERIFIED_ANSWER_EMBEDDING_DEPLOYMENT,
findNearestNeighborsOptions: {
Expand Down Expand Up @@ -307,6 +312,13 @@ export async function closeDbConnections() {
logger.info(`Segment logging is ${segmentConfig ? "enabled" : "disabled"}`);

export const config: AppConfig = {
contentRouterConfig: {
findContent: makeFindContentWithMongoDbMetadata({
findContent,
classifierModel: languageModel,
}),
searchResultsStore,
},
conversationsRouterConfig: {
middleware: [
blockGetRequests,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Mocks
jest.mock("mongodb-rag-core/mongoDbMetadata", () => {
const actual = jest.requireActual("mongodb-rag-core/mongoDbMetadata");
return {
...actual,
classifyMongoDbProgrammingLanguageAndProduct: jest.fn(),
};
});

jest.mock("mongodb-rag-core", () => {
const actual = jest.requireActual("mongodb-rag-core");
return {
...actual,
updateFrontMatter: jest.fn(),
};
});

import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core";
import {
makeFindContentWithMongoDbMetadata,
} from "./findContentWithMongoDbMetadata";
import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata";


const mockedClassify =
classifyMongoDbProgrammingLanguageAndProduct as jest.Mock;
const mockedUpdateFrontMatter = updateFrontMatter as jest.Mock;

function makeMockFindContent(result: string[]): FindContentFunc {
return jest.fn().mockResolvedValue(result);
}

afterEach(() => {
jest.resetAllMocks();
});

describe("makeFindContentWithMongoDbMetadata", () => {
test("enhances query with front matter and classification", async () => {
const inputQuery = "How do I use MongoDB with TypeScript?";
const expectedQuery = `---
product: driver
programmingLanguage: typescript
---
How do I use MongoDB with TypeScript?`;
const fakeResult = ["doc1", "doc2"];

mockedClassify.mockResolvedValue({
product: "driver",
programmingLanguage: "typescript",
});
mockedUpdateFrontMatter.mockReturnValue(expectedQuery);

const findContentMock = makeMockFindContent(fakeResult);

const wrappedFindContent = makeFindContentWithMongoDbMetadata({
findContent: findContentMock,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
classifierModel: {} as any,
});

const result = await wrappedFindContent({
query: inputQuery,
filters: { sourceName: ["docs"] },
limit: 3,
});

expect(mockedClassify).toHaveBeenCalledWith(expect.anything(), inputQuery);
expect(mockedUpdateFrontMatter).toHaveBeenCalledWith(inputQuery, {
product: "driver",
programmingLanguage: "typescript",
});

expect(findContentMock).toHaveBeenCalledWith({
query: expectedQuery,
filters: { sourceName: ["docs"] },
limit: 3,
});

expect(result).toEqual(fakeResult);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core";
import { LanguageModel } from "mongodb-rag-core/aiSdk";
import { wrapTraced } from "mongodb-rag-core/braintrust";
import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata";

export const makeFindContentWithMongoDbMetadata = ({
findContent,
classifierModel,
}: {
findContent: FindContentFunc;
classifierModel: LanguageModel;
}) => {
const wrappedFindContent: FindContentFunc = wrapTraced(
async ({ query, filters, limit }) => {
const { product, programmingLanguage } =
await classifyMongoDbProgrammingLanguageAndProduct(
classifierModel,
query
);

const preProcessedQuery = updateFrontMatter(query, {
...(product ? { product } : {}),
...(programmingLanguage ? { programmingLanguage } : {}),
});

const res = await findContent({
query: preProcessedQuery,
filters,
limit,
});
return res;
},
{
name: "makeFindContentWithMongoDbMetadata",
}
);
return wrappedFindContent;
};
11 changes: 11 additions & 0 deletions packages/mongodb-chatbot-server/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { ObjectId } from "mongodb-rag-core/mongodb";
import { getRequestId, logRequest, sendErrorResponse } from "./utils";
import { CorsOptions } from "cors";
import cloneDeep from "lodash.clonedeep";
import { makeContentRouter, MakeContentRouterParams } from "./routes";

/**
Configuration for the server Express.js app.
Expand All @@ -27,6 +28,11 @@ export interface AppConfig {
*/
conversationsRouterConfig: ConversationsRouterParams;

/**
Configuration for the content router.
*/
contentRouterConfig?: MakeContentRouterParams;

/**
Maximum time in milliseconds for a request to complete before timing out.
Defaults to 60000 (1 minute).
Expand Down Expand Up @@ -119,6 +125,7 @@ export const makeApp = async (config: AppConfig): Promise<Express> => {
corsOptions,
apiPrefix = DEFAULT_API_PREFIX,
expressAppConfig,
contentRouterConfig,
} = config;
logger.info("Server has the following configuration:");
logger.info(
Expand All @@ -141,6 +148,10 @@ export const makeApp = async (config: AppConfig): Promise<Express> => {
makeConversationsRouter(conversationsRouterConfig)
);

if (contentRouterConfig) {
app.use(`${apiPrefix}/content`, makeContentRouter(contentRouterConfig));
}

app.get("/health", (_req, res) => {
const data = {
uptime: process.uptime(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import request from "supertest";
import { makeTestApp } from "../../test/testHelpers";
import type { MakeContentRouterParams } from "./contentRouter";
import type {
FindContentFunc,
MongoDbSearchResultsStore,
} from "mongodb-rag-core";

// Minimal in-memory mock for SearchResultsStore for testing purposes
const mockSearchResultsStore: MongoDbSearchResultsStore = {
drop: jest.fn(),
close: jest.fn(),
metadata: {
databaseName: "mock",
collectionName: "mock",
},
saveSearchResult: jest.fn(),
init: jest.fn(),
};

const findContentMock = jest.fn().mockResolvedValue({
content: [],
queryEmbedding: [],
}) satisfies FindContentFunc;

// Helper to build contentRouterConfig for the test app
function makeContentRouterConfig(
overrides: Partial<MakeContentRouterParams> = {}
) {
return {
findContent: findContentMock,
searchResultsStore: mockSearchResultsStore,
...overrides,
} satisfies MakeContentRouterParams;
}

describe("contentRouter", () => {
const searchEndpoint = "/api/v1/content/search";

it("should call custom middleware if provided", async () => {
const mockMiddleware = jest.fn((_req, _res, next) => next());
const { app, origin } = await makeTestApp({
contentRouterConfig: makeContentRouterConfig({
middleware: [mockMiddleware],
}),
});
await request(app)
.post(searchEndpoint)
.set("req-id", "test-req-id")
.set("Origin", origin)
.send({ query: "mongodb" });
expect(mockMiddleware).toHaveBeenCalled();
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { RequestHandler, Router } from "express";
import { ParamsDictionary } from "express-serve-static-core";
import { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core";

import validateRequestSchema from "../../middleware/validateRequestSchema";
import { SearchContentRequest, makeSearchContentRoute } from "./searchContent";

/**
Middleware to put in front of all the routes in the contentRouter.
Useful for authentication, data validation, logging, etc.
It exposes the app's {@link ContentRouterLocals} via {@link Response.locals}
([docs](https://expressjs.com/en/api.html#res.locals)).
You can use or modify `res.locals.customData` in your middleware, and this data
will be available to subsequent middleware and route handlers.
*/
export type SearchContentMiddleware = RequestHandler<
ParamsDictionary,
unknown,
unknown,
unknown,
SearchContentRouterLocals
>;

/**
Local variables provided by Express.js for single request-response cycle

Keeps track of data for authentication or dynamic data validation.
*/
export interface SearchContentRouterLocals {
customData: Record<string, unknown>;
}

export interface MakeContentRouterParams {
findContent: FindContentFunc;
searchResultsStore: MongoDbSearchResultsStore;
// TODO: Add default middleware along with customData as in conversationsRouter
middleware?: SearchContentMiddleware[];
}

export function makeContentRouter({
findContent,
searchResultsStore,
middleware = [],
}: MakeContentRouterParams) {
const contentRouter = Router();

// Add middleware to the conversationsRouter.
middleware?.forEach((middleware) => contentRouter.use(middleware));

// Create new conversation.
contentRouter.post(
"/search",
validateRequestSchema(SearchContentRequest),
makeSearchContentRoute({
findContent,
searchResultsStore,
})
);

return contentRouter;
}
2 changes: 2 additions & 0 deletions packages/mongodb-chatbot-server/src/routes/content/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export * from "./contentRouter";
export * from "./searchContent";
Loading