From 96646930b3190172c16c8b8905756f31531da3ae Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 2 Jul 2025 15:21:13 -0400 Subject: [PATCH 01/37] Create MongoDbSearchResultsStore, add limit to DefaultFindContent and add test for limit --- .../MongoDbSearchResultsStore.test.ts | 1 + .../contentStore/MongoDbSearchResultsStore.ts | 68 +++++++++++++++++++ .../src/contentStore/index.ts | 1 + .../findContent/DefaultFindContent.test.ts | 15 +++- .../src/findContent/DefaultFindContent.ts | 2 +- 5 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts create mode 100644 packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts new file mode 100644 index 000000000..4567ae3a2 --- /dev/null +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -0,0 +1 @@ +// TODO: Add tests for MongoDbSearchResultsStore \ No newline at end of file diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts new file mode 100644 index 000000000..871672f9d --- /dev/null +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -0,0 +1,68 @@ +import { z } from "zod"; +import { DatabaseConnection } from "../DatabaseConnection"; +import { + MakeMongoDbDatabaseConnectionParams, + makeMongoDbDatabaseConnection, +} from "../MongoDbDatabaseConnection"; +import { Document } from "mongodb"; + +export const DataSourceSchema = z.object({ + name: z.string(), + type: z.string().optional(), + versionLabel: z.string().optional(), +}); + +export type DataSource = z.infer; + +export interface SearchResultRecord { + query: string; + results: Document[]; + dataSources?: DataSource[]; + limit?: number; + createdAt: Date; +} + +export type SearchResultsStore = DatabaseConnection & { + saveSearchResult(record: SearchResultRecord): Promise; + // Add more methods as needed + metadata: { + databaseName: string; + collectionName: string; + }; + init(): Promise; +}; + +export type MakeMongoDbSearchResultsStoreParams = + MakeMongoDbDatabaseConnectionParams & { + collectionName?: string; + }; + +export type ContentCustomData = Record | undefined; + +export function makeMongoDbSearchResultsStore({ + connectionUri, + databaseName, + collectionName = "searchResults", +}: MakeMongoDbSearchResultsStoreParams): SearchResultsStore { + const { db, drop, close } = makeMongoDbDatabaseConnection({ + connectionUri, + databaseName, + }); + const searchResultsCollection = + db.collection(collectionName); + return { + drop, + close, + metadata: { + databaseName, + collectionName, + }, + async saveSearchResult(record) { + // TODO: implement in EAI-973 + }, + async init() { + await searchResultsCollection.createIndex({ query: 1 }); + await searchResultsCollection.createIndex({ createdAt: -1 }); + }, + }; +} diff --git a/packages/mongodb-rag-core/src/contentStore/index.ts b/packages/mongodb-rag-core/src/contentStore/index.ts index a0be7b876..3de61850a 100644 --- a/packages/mongodb-rag-core/src/contentStore/index.ts +++ b/packages/mongodb-rag-core/src/contentStore/index.ts @@ -2,6 +2,7 @@ export * from "./EmbeddedContent"; export * from "./getChangedPages"; export * from "./MongoDbEmbeddedContentStore"; export * from "./MongoDbPageStore"; +export * from "./MongoDbSearchResultsStore"; export * from "./MongoDbTransformedContentStore"; export * from "./Page"; export * from "./PageFormat"; diff --git a/packages/mongodb-rag-core/src/findContent/DefaultFindContent.test.ts b/packages/mongodb-rag-core/src/findContent/DefaultFindContent.test.ts index 11c122092..8e66a4e15 100644 --- a/packages/mongodb-rag-core/src/findContent/DefaultFindContent.test.ts +++ b/packages/mongodb-rag-core/src/findContent/DefaultFindContent.test.ts @@ -96,6 +96,19 @@ describe("makeDefaultFindContent()", () => { expect(embeddingModelName).toBe(OPENAI_RETRIEVAL_EMBEDDING_DEPLOYMENT); }); test("should limit results", async () => { - // TODO: test behavior + const findContent = makeDefaultFindContent({ + embedder, + store: embeddedContentStore, + findNearestNeighborsOptions: { + minScore: 0.1, // low min, should return at least one result + }, + }); + const query = "MongoDB"; + const { content } = await findContent({ + query, + limit: 1, // limit to 1, should return 1 result + }); + expect(content).toBeDefined(); + expect(content.length).toBe(1); }); }); diff --git a/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts b/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts index 1f215f1b5..83c5f6d91 100644 --- a/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts +++ b/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts @@ -28,7 +28,7 @@ export const makeDefaultFindContent = ({ let content = await store.findNearestNeighbors(embedding, { ...findNearestNeighborsOptions, filter: filters, - // TODO: need to add logic to pass limit to findNearestNeighbors + k: limit, }); for (const booster of searchBoosters ?? []) { From f35d9a72f648e9aa743a82b45e2ffd15c089760f Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 3 Jul 2025 11:38:51 -0500 Subject: [PATCH 02/37] Implement saveSearchResult, create MongoDbSearchResultsStore.test.ts --- .../src/routes/content/contentRouter.ts | 5 +- .../src/routes/content/searchContent.ts | 26 ++-- .../MongoDbSearchResultsStore.test.ts | 122 +++++++++++++++++- .../contentStore/MongoDbSearchResultsStore.ts | 40 ++++-- 4 files changed, 162 insertions(+), 31 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index 9374dbb7f..7bdf99899 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -1,15 +1,14 @@ import { Router } from "express"; -import { FindContentFunc } from "mongodb-rag-core"; +import { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core"; import validateRequestSchema from "../../middleware/validateRequestSchema"; import { SearchContentRequest, - SearchResultsStore, makeSearchContentRoute, } from "./searchContent"; export interface MakeContentRouterParams { findContent: FindContentFunc; - searchResultsStore: SearchResultsStore; + searchResultsStore: MongoDbSearchResultsStore; } export function makeContentRouter({ diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index b352ded2f..3d803b1a3 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -1,7 +1,10 @@ import { FindContentFunc, FindContentResult, + MongoDbSearchResultsStore, QueryFilters, + SearchRecordDataSource, + SearchRecordDataSourceSchema, } from "mongodb-rag-core"; import { SomeExpressRequest } from "../../middleware"; import { z } from "zod"; @@ -11,20 +14,9 @@ import { Response as ExpressResponse, } from "express"; -// TODO: need to make this store, as discussed, it should probably be in mongodb-rag-core -export type SearchResultsStore = unknown; - -export const DataSourceSchema = z.object({ - name: z.string(), - type: z.string().optional(), - versionLabel: z.string().optional(), -}); - -export type DataSource = z.infer; - export const SearchContentRequestBody = z.object({ query: z.string(), - dataSources: z.array(DataSourceSchema).optional(), + dataSources: z.array(SearchRecordDataSourceSchema).optional(), limit: z.number().int().min(1).max(500).optional().default(5), }); @@ -42,7 +34,7 @@ export type SearchContentRequestBody = z.infer; export interface MakeSearchContentRouteParams { findContent: FindContentFunc; - searchResultsStore: SearchResultsStore; + searchResultsStore: MongoDbSearchResultsStore; } interface SearchContentResponseChunk { @@ -100,7 +92,9 @@ function mapFindContentResultToSearchContentResponseChunk( }; } -function mapDataSourcesToFilters(dataSources: DataSource[]): QueryFilters { +function mapDataSourcesToFilters( + dataSources: SearchRecordDataSource[] +): QueryFilters { // TODO: implement return {}; } @@ -108,9 +102,9 @@ function mapDataSourcesToFilters(dataSources: DataSource[]): QueryFilters { async function persistSearchResultsToDatabase(params: { query: string; results: FindContentResult; - dataSources: DataSource[]; + dataSources: SearchRecordDataSource[]; limit: number; - searchResultsStore: SearchResultsStore; + searchResultsStore: MongoDbSearchResultsStore; }) { // TODO: implement } diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index 4567ae3a2..eb7a3d316 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -1 +1,121 @@ -// TODO: Add tests for MongoDbSearchResultsStore \ No newline at end of file +import { strict as assert } from "assert"; +import { assertEnvVars } from "../assertEnvVars"; +import { CORE_ENV_VARS } from "../CoreEnvVars"; +import "dotenv/config"; +import { MongoClient } from "mongodb"; +import { MONGO_MEMORY_SERVER_URI } from "../test/constants"; +import { + makeMongoDbSearchResultsStore, + MongoDbSearchResultsStore, + SearchResultRecord, +} from "./MongoDbSearchResultsStore"; + +const { + MONGODB_CONNECTION_URI, + MONGODB_DATABASE_NAME, +} = assertEnvVars(CORE_ENV_VARS); + +describe("MongoDbSearchResultsStore", () => { + let store: MongoDbSearchResultsStore | undefined; + const searchResultRecord: SearchResultRecord = { + query: "What is MongoDB Atlas?", + results: [], + dataSources: [{ name: "source1", type: "docs" }], + createdAt: new Date(), + }; + const uri = MONGO_MEMORY_SERVER_URI; + + beforeAll(async () => { + store = makeMongoDbSearchResultsStore({ + connectionUri: uri, + databaseName: "test-search-content-database", + }); + }); + + afterEach(async () => { + await store?.drop(); + }); + afterAll(async () => { + await store?.close(); + }); + + it("has an overridable default collection name", async () => { + assert(store); + + expect(store.metadata.collectionName).toBe("search_results"); + + const storeWithCustomCollectionName = makeMongoDbSearchResultsStore({ + connectionUri: MONGODB_CONNECTION_URI, + databaseName: store.metadata.databaseName, + collectionName: "custom-search_results", + }); + + expect(storeWithCustomCollectionName.metadata.collectionName).toBe( + "custom-search_results" + ); + }); + + describe("saveSearchResult", () => { + it("saves search result records to db", async () => { + assert(store); + await store.saveSearchResult(searchResultRecord); + + // Check for record in db + const client = new MongoClient(uri); + await client.connect(); + const db = client.db(store.metadata.databaseName); + const collection = db.collection("search_results"); + const found = await collection.findOne(searchResultRecord); + + expect(found).toBeTruthy(); + // Optionally, check specific fields + expect(found?.query).toBe(searchResultRecord.query); + + await client.close(); + }); + it("does NOT save badly formed record", async () => { + assert(store); + const badSearchResultRecord = { + query: "What is aggregation?", + results: [], + dataSources: [{ type: "docs" }], + createdAt: new Date(), + }; + await expect( + // @ts-ignore + store.saveSearchResult(badSearchResultRecord) + ).rejects.toThrow();; + }); + }); +}); + +describe("initializes DB", () => { + let store: MongoDbSearchResultsStore | undefined; + let mongoClient: MongoClient | undefined; + + beforeEach(async () => { + store = makeMongoDbSearchResultsStore({ + connectionUri: MONGODB_CONNECTION_URI, + databaseName: MONGODB_DATABASE_NAME, + }); + mongoClient = new MongoClient(MONGODB_CONNECTION_URI); + }); + + afterEach(async () => { + assert(store); + assert(mongoClient); + await store.close(); + await mongoClient.close(); + }); + + it("creates indexes", async () => { + assert(store); + await store.init(); + + const coll = mongoClient + ?.db(store.metadata.databaseName) + .collection(store.metadata.collectionName); + const indexes = await coll?.listIndexes().toArray(); + expect(indexes?.some((el) => el.name === "createdAt_-1")).toBe(true); + }); +}); diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts index 871672f9d..448d8ec75 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -6,29 +6,36 @@ import { } from "../MongoDbDatabaseConnection"; import { Document } from "mongodb"; -export const DataSourceSchema = z.object({ +export const SearchRecordDataSourceSchema = z.object({ name: z.string(), type: z.string().optional(), versionLabel: z.string().optional(), }); -export type DataSource = z.infer; +export type SearchRecordDataSource = z.infer; + +export const SearchResultRecordSchema = z.object({ + query: z.string(), + results: z.array(z.any()), // or a more specific schema + dataSources: z.array(SearchRecordDataSourceSchema).optional(), + limit: z.number().optional(), + createdAt: z.date(), +}); export interface SearchResultRecord { query: string; results: Document[]; - dataSources?: DataSource[]; + dataSources?: SearchRecordDataSource[]; limit?: number; createdAt: Date; } -export type SearchResultsStore = DatabaseConnection & { - saveSearchResult(record: SearchResultRecord): Promise; - // Add more methods as needed +export type MongoDbSearchResultsStore = DatabaseConnection & { metadata: { databaseName: string; collectionName: string; }; + saveSearchResult(record: SearchResultRecord): Promise; init(): Promise; }; @@ -42,8 +49,8 @@ export type ContentCustomData = Record | undefined; export function makeMongoDbSearchResultsStore({ connectionUri, databaseName, - collectionName = "searchResults", -}: MakeMongoDbSearchResultsStoreParams): SearchResultsStore { + collectionName = "search_results", +}: MakeMongoDbSearchResultsStoreParams): MongoDbSearchResultsStore { const { db, drop, close } = makeMongoDbDatabaseConnection({ connectionUri, databaseName, @@ -57,11 +64,22 @@ export function makeMongoDbSearchResultsStore({ databaseName, collectionName, }, - async saveSearchResult(record) { - // TODO: implement in EAI-973 + async saveSearchResult(record: SearchResultRecord) { + SearchResultRecordSchema.parse(record); + const insertResult = await searchResultsCollection.insertOne(record); + + if (!insertResult.acknowledged) { + throw new Error( + "Insert was not acknowledged by MongoDB" + ); + } + if (!insertResult.insertedId) { + throw new Error( + "No insertedId returned from MongoDbSearchResultsStore.saveSearchResult insertOne" + ); + } }, async init() { - await searchResultsCollection.createIndex({ query: 1 }); await searchResultsCollection.createIndex({ createdAt: -1 }); }, }; From b07d9d9824d5ca2ed6e9aa4176592a02dc387b4e Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 3 Jul 2025 12:57:24 -0500 Subject: [PATCH 03/37] lint, format --- .../src/contentStore/MongoDbSearchResultsStore.test.ts | 9 ++++----- .../src/contentStore/MongoDbSearchResultsStore.ts | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index eb7a3d316..2a2e7d070 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -10,10 +10,8 @@ import { SearchResultRecord, } from "./MongoDbSearchResultsStore"; -const { - MONGODB_CONNECTION_URI, - MONGODB_DATABASE_NAME, -} = assertEnvVars(CORE_ENV_VARS); +const { MONGODB_CONNECTION_URI, MONGODB_DATABASE_NAME } = + assertEnvVars(CORE_ENV_VARS); describe("MongoDbSearchResultsStore", () => { let store: MongoDbSearchResultsStore | undefined; @@ -82,9 +80,10 @@ describe("MongoDbSearchResultsStore", () => { createdAt: new Date(), }; await expect( + // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore store.saveSearchResult(badSearchResultRecord) - ).rejects.toThrow();; + ).rejects.toThrow(); }); }); }); diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts index 448d8ec75..79cdf9929 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -12,7 +12,9 @@ export const SearchRecordDataSourceSchema = z.object({ versionLabel: z.string().optional(), }); -export type SearchRecordDataSource = z.infer; +export type SearchRecordDataSource = z.infer< + typeof SearchRecordDataSourceSchema +>; export const SearchResultRecordSchema = z.object({ query: z.string(), @@ -69,9 +71,7 @@ export function makeMongoDbSearchResultsStore({ const insertResult = await searchResultsCollection.insertOne(record); if (!insertResult.acknowledged) { - throw new Error( - "Insert was not acknowledged by MongoDB" - ); + throw new Error("Insert was not acknowledged by MongoDB"); } if (!insertResult.insertedId) { throw new Error( From 3a1060bc00d598e89132cbcb0ef24141d40abf27 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 3 Jul 2025 13:00:20 -0500 Subject: [PATCH 04/37] Check entire returned document in MongoDbSearchResultsStore.test.ts --- .../src/contentStore/MongoDbSearchResultsStore.test.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index 2a2e7d070..82adc7002 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -66,8 +66,7 @@ describe("MongoDbSearchResultsStore", () => { const found = await collection.findOne(searchResultRecord); expect(found).toBeTruthy(); - // Optionally, check specific fields - expect(found?.query).toBe(searchResultRecord.query); + expect(found).toMatchObject(searchResultRecord); await client.close(); }); From f62e36c1f7707312a650185d8c32bfedde3ce2c7 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 3 Jul 2025 14:00:26 -0500 Subject: [PATCH 05/37] Create ResultChunk type and zod check --- .../MongoDbSearchResultsStore.test.ts | 11 +++++++- .../contentStore/MongoDbSearchResultsStore.ts | 25 ++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index 82adc7002..7f7944511 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -17,7 +17,16 @@ describe("MongoDbSearchResultsStore", () => { let store: MongoDbSearchResultsStore | undefined; const searchResultRecord: SearchResultRecord = { query: "What is MongoDB Atlas?", - results: [], + results: [ + { + url: "foo", + title: "bar", + text: "baz", + metadata: { + sourceName: "source", + }, + }, + ], dataSources: [{ name: "source1", type: "docs" }], createdAt: new Date(), }; diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts index 79cdf9929..b5ddeb73e 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -16,9 +16,32 @@ export type SearchRecordDataSource = z.infer< typeof SearchRecordDataSourceSchema >; +export interface ResultChunk { + url: string; + title: string; + text: string; + metadata: { + sourceName: string; + sourceType?: string; + tags?: string[]; + [key: string]: any; // Accept additional unknown properties + }; +} + +export const ResultChunkSchema = z.object({ + url: z.string(), + title: z.string(), + text: z.string(), + metadata: z.object({ + sourceName: z.string(), + sourceType: z.string().optional(), + tags: z.array(z.string()).optional(), + }).passthrough(), +}); + export const SearchResultRecordSchema = z.object({ query: z.string(), - results: z.array(z.any()), // or a more specific schema + results: z.array(ResultChunkSchema), dataSources: z.array(SearchRecordDataSourceSchema).optional(), limit: z.number().optional(), createdAt: z.date(), From b6f9aecfd23d06197dabbb97a50f92f91ca0c3eb Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 7 Jul 2025 09:36:16 -0400 Subject: [PATCH 06/37] Correct usage of limit in makeDefaultFindContent --- packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts b/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts index 83c5f6d91..572fe10da 100644 --- a/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts +++ b/packages/mongodb-rag-core/src/findContent/DefaultFindContent.ts @@ -28,7 +28,7 @@ export const makeDefaultFindContent = ({ let content = await store.findNearestNeighbors(embedding, { ...findNearestNeighborsOptions, filter: filters, - k: limit, + ...(limit ? { k: limit }: {}) }); for (const booster of searchBoosters ?? []) { From 1c10dfc91b0bb033b86fb153c06428824da16fd3 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 7 Jul 2025 14:52:54 -0400 Subject: [PATCH 07/37] PR feedback: cast badSearchResultRecord as any --- .../src/contentStore/MongoDbSearchResultsStore.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index 7f7944511..fceaf22ff 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -88,9 +88,9 @@ describe("MongoDbSearchResultsStore", () => { createdAt: new Date(), }; await expect( - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore - store.saveSearchResult(badSearchResultRecord) + // Cast as `any` to pass linter (passing bad type purposefully) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + store.saveSearchResult(badSearchResultRecord as any) ).rejects.toThrow(); }); }); From 3a9b937bd1c19e3faea637b51df0430a08992f78 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 7 Jul 2025 15:32:26 -0400 Subject: [PATCH 08/37] Starting structure of searchContent --- .../src/config.ts | 12 +++- .../src/routes/content/searchContent.ts | 56 +++++++++++++++---- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index 5bb7fa705..94af89140 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -31,7 +31,7 @@ import { import { redactConnectionUri } from "./middleware/redactConnectionUri"; import path from "path"; import express from "express"; -import { logger } from "mongodb-rag-core"; +import { logger, makeMongoDbSearchResultsStore } from "mongodb-rag-core"; import { wrapOpenAI, wrapTraced, @@ -120,6 +120,11 @@ export const embeddedContentStore = makeMongoDbEmbeddedContentStore({ }, }); +export const searchResultsStore = makeMongoDbSearchResultsStore({ + connectionUri: MONGODB_CONNECTION_URI, + databaseName: MONGODB_DATABASE_NAME, +}); + export const verifiedAnswerConfig = { embeddingModel: OPENAI_VERIFIED_ANSWER_EMBEDDING_DEPLOYMENT, findNearestNeighborsOptions: { @@ -307,6 +312,11 @@ export async function closeDbConnections() { logger.info(`Segment logging is ${segmentConfig ? "enabled" : "disabled"}`); export const config: AppConfig = { + contentRouterConfig: { + // TODO: Its own implementation of findContent... + findContent, + searchResultsStore, + }, conversationsRouterConfig: { middleware: [ blockGetRequests, diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 3d803b1a3..e4f7ff4c2 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -13,6 +13,7 @@ import { Request as ExpressRequest, Response as ExpressResponse, } from "express"; +import { makeRequestError } from "../conversations/utils"; export const SearchContentRequestBody = z.object({ query: z.string(), @@ -41,8 +42,8 @@ interface SearchContentResponseChunk { url: string; title: string; text: string; - metadata: { - sourceName: string; + metadata?: { + sourceName?: string; sourceType?: string; sourceVersionLabel?: string; tags?: string[]; @@ -77,26 +78,55 @@ export function makeSearchContentRoute({ searchResultsStore, }); } catch (error) { - // TODO: error handling + throw makeRequestError({ + httpStatus: 500, + message: "Unable to query search database", + }); } }; } -// TODO: map FindContentResult to SearchContentResponseChunk function mapFindContentResultToSearchContentResponseChunk( result: FindContentResult ): SearchContentResponseBody { - // TODO: return { - results: [], + results: result.content.map(({ url, metadata, text }) => ({ + url, + title: metadata?.pageTitle || "", + text, + metadata, + })), }; } function mapDataSourcesToFilters( - dataSources: SearchRecordDataSource[] + dataSources?: SearchRecordDataSource[] ): QueryFilters { - // TODO: implement - return {}; + if (!dataSources || dataSources.length === 0) { + return {}; + } + + const sourceNames = dataSources.map((ds) => ds.name); + const sourceTypes = dataSources + .map((ds) => ds.type) + .filter((t): t is string => !!t); + const versionLabels = dataSources + .map((ds) => ds.versionLabel) + .filter((v): v is string => !!v); + + const filter: QueryFilters = {}; + + if (sourceNames.length) { + filter.sourceName = { $in: sourceNames }; + } + if (sourceTypes.length) { + filter.sourceType = { $in: sourceTypes }; + } + if (versionLabels.length) { + filter.version = { label: { $in: versionLabels } }; + } + + return filter; } async function persistSearchResultsToDatabase(params: { @@ -106,5 +136,11 @@ async function persistSearchResultsToDatabase(params: { limit: number; searchResultsStore: MongoDbSearchResultsStore; }) { - // TODO: implement + params.searchResultsStore.saveSearchResult({ + query: params.query, + results: params.results.content, + dataSources: params.dataSources, + limit: params.limit, + createdAt: new Date(), + }); } From f479ea07e80dd170ec38b21854b6e1a5ca06110a Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 09:46:20 -0400 Subject: [PATCH 09/37] Use unknown instead of any for ResultChunk additional metadata --- .../src/contentStore/MongoDbSearchResultsStore.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts index b5ddeb73e..b451d4482 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -24,7 +24,7 @@ export interface ResultChunk { sourceName: string; sourceType?: string; tags?: string[]; - [key: string]: any; // Accept additional unknown properties + [key: string]: unknown; // Accept additional unknown properties }; } From 4df3c202afdf441b3a93bdbd454f8c4d46ae4ff6 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 10:06:20 -0400 Subject: [PATCH 10/37] Add searchContent test file, broaden QueryFilter & MongoDbAtlasVectorSearchFilter types --- .../src/routes/content/searchContent.test.ts | 159 +++++++++++++++++- .../src/contentStore/EmbeddedContent.ts | 9 +- .../MongoDbEmbeddedContentStore.ts | 6 +- 3 files changed, 160 insertions(+), 14 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts index a0613ea67..f02c4b60a 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts @@ -1,6 +1,155 @@ -// TODO: test the route -describe("searchContent", () => { - it("should search content", () => { - expect(true).toBe(true); +import { makeSearchContentRoute } from "./searchContent"; +import type { FindContentFunc, FindContentResult } from "mongodb-rag-core"; +import type { MongoDbSearchResultsStore } from "mongodb-rag-core"; +import { createRequest, createResponse } from "node-mocks-http"; + +// Helper to create a mock FindContentFunc +function makeMockFindContent(result: FindContentResult) { + return jest.fn().mockResolvedValue(result) satisfies FindContentFunc; +} + +// Helper to create a mock MongoDbSearchResultsStore +function makeMockMongoDbSearchResultsStore() { + return { + drop: jest.fn(), + close: jest.fn(), + metadata: { databaseName: "mock", collectionName: "mock" }, + saveSearchResult: jest.fn().mockResolvedValue(undefined), + init: jest.fn(), + } satisfies MongoDbSearchResultsStore; +} + +describe("makeSearchContentRoute", () => { + const baseReqBody = { + query: "What is aggregation?", + limit: 2, + dataSources: [{ name: "source1", type: "docs", versionLabel: "v1" }], + }; + // Add all required EmbeddedContent fields for the mock result + const baseFindContentResult: FindContentResult = { + queryEmbedding: [0.1, 0.2, 0.3], + content: [ + { + url: "https://www.mongodb.com/docs/manual/aggregation", + text: "Look at all this aggregation", + metadata: { pageTitle: "Aggregation Operations" }, + sourceName: "source1", + tokenCount: 8, + embeddings: { test: [0.1, 0.2, 0.3] }, + updated: new Date(), + score: 0.8, + }, + { + url: "https://mongodb.com/docs", + text: "MongoDB Docs", + metadata: { pageTitle: "MongoDB" }, + sourceName: "source1", + tokenCount: 10, + embeddings: { test: [0.1, 0.2, 0.3] }, + updated: new Date(), + score: 0.6, + }, + ], + }; + + it("should return search results for a valid request", async () => { + const findContent = makeMockFindContent(baseFindContentResult); + const searchResultsStore = makeMockMongoDbSearchResultsStore(); + const handler = makeSearchContentRoute({ findContent, searchResultsStore }); + + const req = createRequest({ + body: baseReqBody, + headers: { "req-id": "test-req-id" }, + }); + const res = createResponse(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await handler(req, res as any); + + const data = res._getJSONData(); + expect(data).toHaveProperty("results"); + expect(Array.isArray(data.results)).toBe(true); + expect(data.results.length).toBe(2); + expect(data.results[0].url).toBe( + "https://www.mongodb.com/docs/manual/aggregation" + ); }); -}); + + it("should call findContent with correct arguments", async () => { + const findContent = jest.fn().mockResolvedValue(baseFindContentResult); + const searchResultsStore = makeMockMongoDbSearchResultsStore(); + const handler = makeSearchContentRoute({ findContent, searchResultsStore }); + const req = createRequest({ + body: baseReqBody, + headers: { "req-id": "test-req-id" }, + }); + const res = createResponse(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await handler(req, res as any); + + expect(findContent).toHaveBeenCalledWith({ + query: baseReqBody.query, + filters: expect.any(Object), + limit: baseReqBody.limit, + }); + }); + + it("should call searchResultsStore.saveSearchResult", async () => { + const findContent = makeMockFindContent(baseFindContentResult); + const searchResultsStore = makeMockMongoDbSearchResultsStore(); + const handler = makeSearchContentRoute({ findContent, searchResultsStore }); + const req = createRequest({ + body: baseReqBody, + headers: { "req-id": "test-req-id" }, + }); + const res = createResponse(); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await handler(req, res as any); + expect(searchResultsStore.saveSearchResult).toHaveBeenCalledWith( + expect.objectContaining({ + query: baseReqBody.query, + results: baseFindContentResult.content, + dataSources: baseReqBody.dataSources, + limit: baseReqBody.limit, + }) + ); + }); + + it("should handle errors from findContent and throw", async () => { + const findContent = jest.fn().mockRejectedValue(new Error("fail")); + const searchResultsStore = makeMockMongoDbSearchResultsStore(); + const handler = makeSearchContentRoute({ findContent, searchResultsStore }); + const req = createRequest({ + body: baseReqBody, + headers: { "req-id": "test-req-id" }, + }); + const res = createResponse(); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await expect(handler(req, res as any)).rejects.toMatchObject({ + message: "Unable to query search database", + httpStatus: 500, + name: "RequestError", + }); + }); + + it("should respect `limit` and `dataSources` parameters", async () => { + const findContent = jest.fn().mockResolvedValue(baseFindContentResult); + const searchResultsStore = makeMockMongoDbSearchResultsStore(); + const handler = makeSearchContentRoute({ findContent, searchResultsStore }); + const req = createRequest({ + body: { ...baseReqBody, limit: 1, dataSources: [{ name: "source2" }] }, + headers: { "req-id": "test-req-id" }, + }); + const res = createResponse(); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await handler(req, res as any); + expect(findContent).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 1, + filters: expect.objectContaining({ sourceName: { $in: ["source2"] } }), + }) + ); + }); +}); \ No newline at end of file diff --git a/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts b/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts index 4bbe948b1..c9d8b828b 100644 --- a/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts +++ b/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts @@ -96,12 +96,9 @@ export interface GetSourcesMatchParams { Filters for querying the embedded content vector store. */ export type QueryFilters = { - sourceName?: string; - version?: { - current?: boolean; - label?: string; - }; - sourceType?: Page["sourceType"]; + sourceName?: string | { $in: string[] }; + version?: { current?: boolean; label?: string | { $in: string[] } }; + sourceType?: Page["sourceType"] | { $in: string[] }; }; /** diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts index 147da6ebc..9297a23b9 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts @@ -294,10 +294,10 @@ export function makeMongoDbEmbeddedContentStore({ } type MongoDbAtlasVectorSearchFilter = { - sourceName?: string; - "metadata.version.label"?: string; + sourceName?: string | { $in: string[] }; + "metadata.version.label"?: string | { $in: string[] }; "metadata.version.isCurrent"?: boolean | { $ne: boolean }; - sourceType?: string; + sourceType?: string | { $in: string[] }; }; const handleFilters = ( From baad5bd60334e11d0f7cbc44e3f73c9579c854b7 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 10:19:31 -0400 Subject: [PATCH 11/37] Work on clarity of comments in contentRouter --- .../src/routes/content/contentRouter.ts | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index 7bdf99899..772b784ad 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -1,10 +1,39 @@ -import { Router } from "express"; +import { RequestHandler, Router } from "express"; import { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core"; import validateRequestSchema from "../../middleware/validateRequestSchema"; import { SearchContentRequest, makeSearchContentRoute, } from "./searchContent"; +import { ParamsDictionary } from "express-serve-static-core"; + +/** + Middleware to put in front of all the routes in the contentRouter. + Useful for authentication, data validation, logging, etc. + It exposes the app's {@link ContentRouterLocals} via {@link Response.locals} + ([docs](https://expressjs.com/en/api.html#res.locals)). + You can use or modify `res.locals.customData` in your middleware, and this data + will be available to subsequent middleware and route handlers. + */ +export type SearchContentMiddleware = RequestHandler< + ParamsDictionary, + unknown, + unknown, + unknown, + ContentRouterLocals +>; + +/** + Local variables provided by Express.js for single request-response cycle + + Keeps track of data for authentication or dynamic data validation. + */ +export interface ContentRouterLocals { + customData: Record; +} + + + export interface MakeContentRouterParams { findContent: FindContentFunc; From b3ad758251ef3ce7ef2e71e795f6f114708ccb9d Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 10:41:44 -0400 Subject: [PATCH 12/37] PR feedback: Combine describe blocks in MongoDbSearchResultsStore.test.ts, remove zod checks where unnecessary in MongoDbSearchResultsStore --- .../MongoDbSearchResultsStore.test.ts | 97 ++++++------------- .../contentStore/MongoDbSearchResultsStore.ts | 13 +-- 2 files changed, 38 insertions(+), 72 deletions(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index fceaf22ff..a76b64108 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -1,6 +1,4 @@ import { strict as assert } from "assert"; -import { assertEnvVars } from "../assertEnvVars"; -import { CORE_ENV_VARS } from "../CoreEnvVars"; import "dotenv/config"; import { MongoClient } from "mongodb"; import { MONGO_MEMORY_SERVER_URI } from "../test/constants"; @@ -10,27 +8,25 @@ import { SearchResultRecord, } from "./MongoDbSearchResultsStore"; -const { MONGODB_CONNECTION_URI, MONGODB_DATABASE_NAME } = - assertEnvVars(CORE_ENV_VARS); +const searchResultRecord: SearchResultRecord = { + query: "What is MongoDB Atlas?", + results: [ + { + url: "foo", + title: "bar", + text: "baz", + metadata: { + sourceName: "source", + }, + }, + ], + dataSources: [{ name: "source1", type: "docs" }], + createdAt: new Date(), +}; +const uri = MONGO_MEMORY_SERVER_URI; describe("MongoDbSearchResultsStore", () => { let store: MongoDbSearchResultsStore | undefined; - const searchResultRecord: SearchResultRecord = { - query: "What is MongoDB Atlas?", - results: [ - { - url: "foo", - title: "bar", - text: "baz", - metadata: { - sourceName: "source", - }, - }, - ], - dataSources: [{ name: "source1", type: "docs" }], - createdAt: new Date(), - }; - const uri = MONGO_MEMORY_SERVER_URI; beforeAll(async () => { store = makeMongoDbSearchResultsStore({ @@ -52,7 +48,7 @@ describe("MongoDbSearchResultsStore", () => { expect(store.metadata.collectionName).toBe("search_results"); const storeWithCustomCollectionName = makeMongoDbSearchResultsStore({ - connectionUri: MONGODB_CONNECTION_URI, + connectionUri: uri, databaseName: store.metadata.databaseName, collectionName: "custom-search_results", }); @@ -62,6 +58,20 @@ describe("MongoDbSearchResultsStore", () => { ); }); + it("creates indexes", async () => { + assert(store); + await store.init(); + + const mongoClient = new MongoClient(uri); + const coll = mongoClient + ?.db(store.metadata.databaseName) + .collection(store.metadata.collectionName); + const indexes = await coll?.listIndexes().toArray(); + + expect(indexes?.some((el) => el.name === "createdAt_-1")).toBe(true); + await mongoClient.close(); + }); + describe("saveSearchResult", () => { it("saves search result records to db", async () => { assert(store); @@ -79,50 +89,5 @@ describe("MongoDbSearchResultsStore", () => { await client.close(); }); - it("does NOT save badly formed record", async () => { - assert(store); - const badSearchResultRecord = { - query: "What is aggregation?", - results: [], - dataSources: [{ type: "docs" }], - createdAt: new Date(), - }; - await expect( - // Cast as `any` to pass linter (passing bad type purposefully) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - store.saveSearchResult(badSearchResultRecord as any) - ).rejects.toThrow(); - }); - }); -}); - -describe("initializes DB", () => { - let store: MongoDbSearchResultsStore | undefined; - let mongoClient: MongoClient | undefined; - - beforeEach(async () => { - store = makeMongoDbSearchResultsStore({ - connectionUri: MONGODB_CONNECTION_URI, - databaseName: MONGODB_DATABASE_NAME, - }); - mongoClient = new MongoClient(MONGODB_CONNECTION_URI); - }); - - afterEach(async () => { - assert(store); - assert(mongoClient); - await store.close(); - await mongoClient.close(); - }); - - it("creates indexes", async () => { - assert(store); - await store.init(); - - const coll = mongoClient - ?.db(store.metadata.databaseName) - .collection(store.metadata.collectionName); - const indexes = await coll?.listIndexes().toArray(); - expect(indexes?.some((el) => el.name === "createdAt_-1")).toBe(true); }); }); diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts index b451d4482..f9fa77c79 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -32,11 +32,13 @@ export const ResultChunkSchema = z.object({ url: z.string(), title: z.string(), text: z.string(), - metadata: z.object({ - sourceName: z.string(), - sourceType: z.string().optional(), - tags: z.array(z.string()).optional(), - }).passthrough(), + metadata: z + .object({ + sourceName: z.string(), + sourceType: z.string().optional(), + tags: z.array(z.string()).optional(), + }) + .passthrough(), }); export const SearchResultRecordSchema = z.object({ @@ -90,7 +92,6 @@ export function makeMongoDbSearchResultsStore({ collectionName, }, async saveSearchResult(record: SearchResultRecord) { - SearchResultRecordSchema.parse(record); const insertResult = await searchResultsCollection.insertOne(record); if (!insertResult.acknowledged) { From 370bd86cf568dddfd88e536523f1ccdd03399e69 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 11:26:14 -0400 Subject: [PATCH 13/37] Use generics on middleware: requireRequestOrigin, requireValidIpAddress --- .../src/middleware/requireRequestOrigin.ts | 8 ++++++-- .../src/middleware/requireValidIpAddress.ts | 11 +++++++++-- .../src/routes/content/contentRouter.ts | 10 +++++++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts b/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts index 1f6567e04..ec507b71e 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts @@ -1,9 +1,13 @@ +import { RequestHandler } from "express"; +import { ParamsDictionary } from "express-serve-static-core"; + import { getRequestId, logRequest, sendErrorResponse } from "../utils"; -import { ConversationsMiddleware } from "../routes/conversations/conversationsRouter"; export const CUSTOM_REQUEST_ORIGIN_HEADER = "X-Request-Origin"; -export function requireRequestOrigin(): ConversationsMiddleware { +export function requireRequestOrigin< + Locals extends Record +>(): RequestHandler { return (req, res, next) => { const reqId = getRequestId(req); diff --git a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts index a4e3914dc..200029cee 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts @@ -1,8 +1,15 @@ +import { RequestHandler } from "express"; +import { ParamsDictionary } from "express-serve-static-core"; + import { getRequestId, logRequest, sendErrorResponse } from "../utils"; -import { ConversationsMiddleware } from "../routes/conversations/conversationsRouter"; import { isValidIp } from "../routes/conversations/utils"; +import { ConversationsRouterLocals, SearchContentRouterLocals } from "../routes"; + +export type Locals = ConversationsRouterLocals | SearchContentRouterLocals; -export function requireValidIpAddress(): ConversationsMiddleware { +export function requireValidIpAddress< + Locals extends Record +>(): RequestHandler { return (req, res, next) => { const reqId = getRequestId(req); diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index 772b784ad..810a28eb9 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -6,6 +6,7 @@ import { makeSearchContentRoute, } from "./searchContent"; import { ParamsDictionary } from "express-serve-static-core"; +import { requireRequestOrigin, requireValidIpAddress } from "../../middleware"; /** Middleware to put in front of all the routes in the contentRouter. @@ -20,7 +21,7 @@ export type SearchContentMiddleware = RequestHandler< unknown, unknown, unknown, - ContentRouterLocals + SearchContentRouterLocals >; /** @@ -28,7 +29,7 @@ export type SearchContentMiddleware = RequestHandler< Keeps track of data for authentication or dynamic data validation. */ -export interface ContentRouterLocals { +export interface SearchContentRouterLocals { customData: Record; } @@ -38,15 +39,18 @@ export interface ContentRouterLocals { export interface MakeContentRouterParams { findContent: FindContentFunc; searchResultsStore: MongoDbSearchResultsStore; + middleware?: SearchContentMiddleware[]; } export function makeContentRouter({ findContent, searchResultsStore, + middleware = [requireValidIpAddress(), requireRequestOrigin()], }: MakeContentRouterParams) { const contentRouter = Router(); - // TODO: add middleware, similar to the conversations router + // Add middleware to the conversationsRouter. + middleware?.forEach((middleware) => contentRouter.use(middleware)); // Create new conversation. contentRouter.post( From a9d1910385fe7029d00001f6a6eb518a3dab8254 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 11:40:50 -0400 Subject: [PATCH 14/37] Structure out contentRouter test file --- .../src/middleware/requireValidIpAddress.ts | 3 - .../src/routes/content/contentRouter.test.ts | 100 +++++++++++++++++- .../src/routes/content/contentRouter.ts | 3 - 3 files changed, 96 insertions(+), 10 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts index 200029cee..c2627894d 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts @@ -3,9 +3,6 @@ import { ParamsDictionary } from "express-serve-static-core"; import { getRequestId, logRequest, sendErrorResponse } from "../utils"; import { isValidIp } from "../routes/conversations/utils"; -import { ConversationsRouterLocals, SearchContentRouterLocals } from "../routes"; - -export type Locals = ConversationsRouterLocals | SearchContentRouterLocals; export function requireValidIpAddress< Locals extends Record diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts index 3f09f649b..811ec83bd 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts @@ -1,6 +1,98 @@ -// TODO: test the router +import request from "supertest"; +import { makeTestApp } from "../../test/testHelpers"; +import type { MakeContentRouterParams } from "./contentRouter"; +import type { MongoDbSearchResultsStore } from "mongodb-rag-core"; + +// Minimal in-memory mock for SearchResultsStore for testing purposes +const mockSearchResultsStore: MongoDbSearchResultsStore = { + drop: jest.fn(), + close: jest.fn(), + metadata: { + databaseName: "mock", + collectionName: "mock", + }, + saveSearchResult: jest.fn(), + init: jest.fn() +}; + +// Helper to build contentRouterConfig for the test app +function makeContentRouterConfig( + overrides: Partial = {} +) { + return { + findContent: jest + .fn() + .mockResolvedValue({ content: [], queryEmbedding: [] }), + searchResultsStore: mockSearchResultsStore, + ...overrides, + } satisfies MakeContentRouterParams; +} + describe("contentRouter", () => { - it("should create a new conversation", () => { - expect(true).toBe(true); + const searchEndpoint = "/api/v1/content/search"; + + it("should return search results for a valid request", async () => { + const { app, origin } = await makeTestApp({ + contentRouterConfig: makeContentRouterConfig(), + }); + const res = await request(app) + .post(searchEndpoint) + .set("req-id", "test-req-id") + .set("Origin", origin) + .send({ + query: "mongodb", + limit: 2, + }); + expect(res.status).toBe(200); + expect(res.body).toHaveProperty("results"); + expect(Array.isArray(res.body.results)).toBe(true); }); -}); + + it("should return 400 for missing query field", async () => { + const { app, origin } = await makeTestApp({ + contentRouterConfig: makeContentRouterConfig(), + }); + const res = await request(app) + .post(searchEndpoint) + .set("req-id", "test-req-id") + .set("Origin", origin) + .send({}); + + expect(res.body).toHaveProperty("error"); + expect(res.body.error).toBe("Invalid request"); + }); + + it("should call custom middleware if provided", async () => { + const mockMiddleware = jest.fn((_req, _res, next) => next()); + const { app, origin } = await makeTestApp({ + contentRouterConfig: makeContentRouterConfig({ + middleware: [mockMiddleware], + }), + }); + await request(app) + .post(searchEndpoint) + .set("req-id", "test-req-id") + .set("Origin", origin) + .send({ query: "mongodb" }); + expect(mockMiddleware).toHaveBeenCalled(); + }); + + it("should use the 'limit' parameter to not return more results than requested", async () => { + const { app, origin } = await makeTestApp({ + contentRouterConfig: makeContentRouterConfig(), + }); + const limit = 1; + const res = await request(app) + .post(searchEndpoint) + .set("req-id", "test-req-id") + .set("Origin", origin) + .send({ + query: "mongodb", + limit, + }); + expect(res.status).toBe(200); + expect(res.body).toHaveProperty("results"); + expect(Array.isArray(res.body.results)).toBe(true); + expect(res.body.results.length).toBeLessThanOrEqual(limit); + }); +}); \ No newline at end of file diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index 810a28eb9..be3119030 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -33,9 +33,6 @@ export interface SearchContentRouterLocals { customData: Record; } - - - export interface MakeContentRouterParams { findContent: FindContentFunc; searchResultsStore: MongoDbSearchResultsStore; From dd3c31646f37b5d394f9151192bc387b973889c4 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 11:43:00 -0400 Subject: [PATCH 15/37] Combine describes --- .../MongoDbSearchResultsStore.test.ts | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts index a76b64108..525fd44ea 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.test.ts @@ -72,22 +72,20 @@ describe("MongoDbSearchResultsStore", () => { await mongoClient.close(); }); - describe("saveSearchResult", () => { - it("saves search result records to db", async () => { - assert(store); - await store.saveSearchResult(searchResultRecord); + it("saves search result records to db", async () => { + assert(store); + await store.saveSearchResult(searchResultRecord); - // Check for record in db - const client = new MongoClient(uri); - await client.connect(); - const db = client.db(store.metadata.databaseName); - const collection = db.collection("search_results"); - const found = await collection.findOne(searchResultRecord); + // Check for record in db + const client = new MongoClient(uri); + await client.connect(); + const db = client.db(store.metadata.databaseName); + const collection = db.collection("search_results"); + const found = await collection.findOne(searchResultRecord); - expect(found).toBeTruthy(); - expect(found).toMatchObject(searchResultRecord); + expect(found).toBeTruthy(); + expect(found).toMatchObject(searchResultRecord); - await client.close(); - }); + await client.close(); }); }); From 5ebefb562328919b105435b32d1fb459dac78068 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 8 Jul 2025 11:52:58 -0400 Subject: [PATCH 16/37] Clean --- .../src/middleware/requireRequestOrigin.ts | 8 ++------ .../src/middleware/requireValidIpAddress.ts | 8 ++------ .../src/routes/content/contentRouter.ts | 3 +-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts b/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts index ec507b71e..1f6567e04 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts @@ -1,13 +1,9 @@ -import { RequestHandler } from "express"; -import { ParamsDictionary } from "express-serve-static-core"; - import { getRequestId, logRequest, sendErrorResponse } from "../utils"; +import { ConversationsMiddleware } from "../routes/conversations/conversationsRouter"; export const CUSTOM_REQUEST_ORIGIN_HEADER = "X-Request-Origin"; -export function requireRequestOrigin< - Locals extends Record ->(): RequestHandler { +export function requireRequestOrigin(): ConversationsMiddleware { return (req, res, next) => { const reqId = getRequestId(req); diff --git a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts index c2627894d..a4e3914dc 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts @@ -1,12 +1,8 @@ -import { RequestHandler } from "express"; -import { ParamsDictionary } from "express-serve-static-core"; - import { getRequestId, logRequest, sendErrorResponse } from "../utils"; +import { ConversationsMiddleware } from "../routes/conversations/conversationsRouter"; import { isValidIp } from "../routes/conversations/utils"; -export function requireValidIpAddress< - Locals extends Record ->(): RequestHandler { +export function requireValidIpAddress(): ConversationsMiddleware { return (req, res, next) => { const reqId = getRequestId(req); diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index be3119030..b98376fbd 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -6,7 +6,6 @@ import { makeSearchContentRoute, } from "./searchContent"; import { ParamsDictionary } from "express-serve-static-core"; -import { requireRequestOrigin, requireValidIpAddress } from "../../middleware"; /** Middleware to put in front of all the routes in the contentRouter. @@ -42,7 +41,7 @@ export interface MakeContentRouterParams { export function makeContentRouter({ findContent, searchResultsStore, - middleware = [requireValidIpAddress(), requireRequestOrigin()], + middleware = [], }: MakeContentRouterParams) { const contentRouter = Router(); From 647144f81ff7a79f275a98b05720de91dfeb219f Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 9 Jul 2025 09:59:13 -0400 Subject: [PATCH 17/37] makeFindContentWithMongoDbMetadata --- .../src/config.ts | 51 +++++++++++++++++-- .../src/routes/content/contentRouter.ts | 9 ++-- .../src/routes/content/searchContent.ts | 11 ++-- .../src/mongoDbMetadata/classifyMetadata.ts | 14 +++++ 4 files changed, 70 insertions(+), 15 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index 94af89140..faa47a07a 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -31,7 +31,12 @@ import { import { redactConnectionUri } from "./middleware/redactConnectionUri"; import path from "path"; import express from "express"; -import { logger, makeMongoDbSearchResultsStore } from "mongodb-rag-core"; +import { + FindContentFunc, + logger, + makeMongoDbSearchResultsStore, + updateFrontMatter, +} from "mongodb-rag-core"; import { wrapOpenAI, wrapTraced, @@ -40,7 +45,6 @@ import { import { AzureOpenAI } from "mongodb-rag-core/openai"; import { MongoClient } from "mongodb-rag-core/mongodb"; import { - ANALYZER_ENV_VARS, AZURE_OPENAI_ENV_VARS, PREPROCESSOR_ENV_VARS, TRACING_ENV_VARS, @@ -57,7 +61,8 @@ import { makeGenerateResponseWithSearchTool } from "./processors/generateRespons import { makeBraintrustLogger } from "mongodb-rag-core/braintrust"; import { makeMongoDbScrubbedMessageStore } from "./tracing/scrubbedMessages/MongoDbScrubbedMessageStore"; import { MessageAnalysis } from "./tracing/scrubbedMessages/analyzeMessage"; -import { createAzure } from "mongodb-rag-core/aiSdk"; +import { createAzure, LanguageModel } from "mongodb-rag-core/aiSdk"; +import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata"; export const { MONGODB_CONNECTION_URI, @@ -156,6 +161,40 @@ embeddedContentStore.findNearestNeighbors = wrapTraced( { name: "findNearestNeighbors" } ); +export const makeFindContentWithMongoDbMetadata = ({ + findContent, + classifierModel, +}: { + findContent: FindContentFunc; + classifierModel: LanguageModel; +}) => { + const wrappedFindContent: FindContentFunc = wrapTraced( + async ({ query, filters, limit }) => { + const { product, programmingLanguage } = + await classifyMongoDbProgrammingLanguageAndProduct( + classifierModel, + query + ); + + const preProcessedQuery = updateFrontMatter(query, { + ...(product ? { product } : {}), + ...(programmingLanguage ? { programmingLanguage } : {}), + }); + + const res = await findContent({ + query: preProcessedQuery, + filters, + limit, + }); + return res; + }, + { + name: "makeFindContentWithMongoDbMetadata", + } + ); + return wrappedFindContent; +}; + export const findContent = wrapTraced( makeDefaultFindContent({ embedder, @@ -313,8 +352,10 @@ logger.info(`Segment logging is ${segmentConfig ? "enabled" : "disabled"}`); export const config: AppConfig = { contentRouterConfig: { - // TODO: Its own implementation of findContent... - findContent, + findContent: makeFindContentWithMongoDbMetadata({ + findContent, + classifierModel: languageModel, + }), searchResultsStore, }, conversationsRouterConfig: { diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index b98376fbd..a3dc8d280 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -1,11 +1,9 @@ import { RequestHandler, Router } from "express"; +import { ParamsDictionary } from "express-serve-static-core"; import { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core"; + import validateRequestSchema from "../../middleware/validateRequestSchema"; -import { - SearchContentRequest, - makeSearchContentRoute, -} from "./searchContent"; -import { ParamsDictionary } from "express-serve-static-core"; +import { SearchContentRequest, makeSearchContentRoute } from "./searchContent"; /** Middleware to put in front of all the routes in the contentRouter. @@ -35,6 +33,7 @@ export interface SearchContentRouterLocals { export interface MakeContentRouterParams { findContent: FindContentFunc; searchResultsStore: MongoDbSearchResultsStore; + // TODO: Add default middleware as in conversationsRouter middleware?: SearchContentMiddleware[]; } diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index e4f7ff4c2..cfc3c2e1b 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -1,3 +1,7 @@ +import { + Request as ExpressRequest, + Response as ExpressResponse, +} from "express"; import { FindContentFunc, FindContentResult, @@ -6,13 +10,10 @@ import { SearchRecordDataSource, SearchRecordDataSourceSchema, } from "mongodb-rag-core"; -import { SomeExpressRequest } from "../../middleware"; import { z } from "zod"; + +import { SomeExpressRequest } from "../../middleware"; import { ConversationsRouterLocals } from "../conversations"; -import { - Request as ExpressRequest, - Response as ExpressResponse, -} from "express"; import { makeRequestError } from "../conversations/utils"; export const SearchContentRequestBody = z.object({ diff --git a/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts b/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts index 3ba058b3b..067595d09 100644 --- a/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts +++ b/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts @@ -135,6 +135,20 @@ ${mongoDbTopics function nullOnErr() { return null; } + +export const classifyMongoDbProgrammingLanguageAndProduct = wrapTraced( + async (model: LanguageModel, data: string, maxRetries?: number) => { + const [programmingLanguage, product] = await Promise.all([ + classifyMongoDbProgrammingLanguage(model, data, maxRetries).catch( + nullOnErr + ), + classifyMongoDbProduct(model, data, maxRetries).catch(nullOnErr), + ]); + return { programmingLanguage, product }; + }, + { name: "classifyMongoDbProgrammingLanguageAndProduct" } +); + export const classifyMongoDbMetadata = wrapTraced( async (model: LanguageModel, data: string, maxRetries?: number) => { const [programmingLanguage, product, topic] = await Promise.all([ From c65e8e6fabfedca7106cc2488739e0d59fe9a285 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 9 Jul 2025 10:48:28 -0400 Subject: [PATCH 18/37] config.test.ts --- .../src/config.test.ts | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 packages/chatbot-server-mongodb-public/src/config.test.ts diff --git a/packages/chatbot-server-mongodb-public/src/config.test.ts b/packages/chatbot-server-mongodb-public/src/config.test.ts new file mode 100644 index 000000000..41a0ce074 --- /dev/null +++ b/packages/chatbot-server-mongodb-public/src/config.test.ts @@ -0,0 +1,91 @@ +import { makeFindContentWithMongoDbMetadata } from "./config"; + +// Mocks + +jest.mock("mongodb-rag-core/mongoDbMetadata", () => { + const actual = jest.requireActual("mongodb-rag-core/mongoDbMetadata"); + return { + ...actual, + classifyMongoDbProgrammingLanguageAndProduct: jest.fn(), + }; +}); + +jest.mock("mongodb-rag-core", () => { + const actual = jest.requireActual("mongodb-rag-core"); + return { + ...actual, + updateFrontMatter: jest.fn(), + }; +}); + +jest.mock("mongodb-rag-core/braintrust", () => { + const actual = jest.requireActual("mongodb-rag-core/braintrust"); + return { + ...actual, + wrapTraced: jest.fn(), + }; +}); + +import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata"; +import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; +import { wrapTraced } from "mongodb-rag-core/braintrust"; + +const mockedClassify = + classifyMongoDbProgrammingLanguageAndProduct as jest.Mock; +const mockedUpdateFrontMatter = updateFrontMatter as jest.Mock; +const mockedWrapTraced = wrapTraced as jest.Mock; + +function makeMockFindContent(result: string[]): FindContentFunc { + return jest.fn().mockResolvedValue(result); +} + +afterEach(() => { + jest.resetAllMocks(); +}); + +describe("makeFindContentWithMongoDbMetadata", () => { + test("enhances query with front matter and classification", async () => { + const inputQuery = "How do I use MongoDB with TypeScript?"; + const expectedQuery = `--- + product: driver + programmingLanguage: typescript + --- + How do I use MongoDB with TypeScript?`; + const fakeResult = ["doc1", "doc2"]; + + mockedClassify.mockResolvedValue({ + product: "driver", + programmingLanguage: "typescript", + }); + mockedUpdateFrontMatter.mockReturnValue(expectedQuery); + mockedWrapTraced.mockImplementation((fn) => fn); + + const findContentMock = makeMockFindContent(fakeResult); + + const wrappedFindContent = makeFindContentWithMongoDbMetadata({ + findContent: findContentMock, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + classifierModel: {} as any, + }); + + const result = await wrappedFindContent({ + query: inputQuery, + filters: { sourceName: { $in: ["docs"] } }, + limit: 3, + }); + + expect(mockedClassify).toHaveBeenCalledWith(expect.anything(), inputQuery); + expect(mockedUpdateFrontMatter).toHaveBeenCalledWith(inputQuery, { + product: "driver", + programmingLanguage: "typescript", + }); + + expect(findContentMock).toHaveBeenCalledWith({ + query: expectedQuery, + filters: { sourceName: { $in: ["docs"] } }, + limit: 3, + }); + + expect(result).toEqual(fakeResult); + }); +}); From 147aea096f697716a93a07c84e22dc1c30597ea2 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 9 Jul 2025 10:58:37 -0400 Subject: [PATCH 19/37] Clean --- .../src/routes/content/searchContent.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index cfc3c2e1b..5f1731530 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -13,8 +13,8 @@ import { import { z } from "zod"; import { SomeExpressRequest } from "../../middleware"; -import { ConversationsRouterLocals } from "../conversations"; import { makeRequestError } from "../conversations/utils"; +import { SearchContentRouterLocals } from "./contentRouter"; export const SearchContentRequestBody = z.object({ query: z.string(), @@ -61,7 +61,7 @@ export function makeSearchContentRoute({ }: MakeSearchContentRouteParams) { return async ( req: ExpressRequest, - res: ExpressResponse + res: ExpressResponse ) => { try { const { query, dataSources, limit } = req.body; From 5dabbf6ce1efe75274d6684773c264bda4e7ee26 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 10 Jul 2025 14:35:33 -0400 Subject: [PATCH 20/37] PR feedback --- .../src/config.ts | 40 +------------- .../findContentWithMongoDbMetadata.test.ts} | 9 +-- .../findContentWithMongoDbMetadata.ts | 55 +++++++++++++++++++ .../src/routes/content/contentRouter.test.ts | 17 ++++-- .../src/routes/content/searchContent.ts | 2 +- .../src/contentStore/EmbeddedContent.ts | 6 +- .../MongoDbEmbeddedContentStore.ts | 10 +++- .../src/mongoDbMetadata/classifyMetadata.ts | 13 ----- .../MongoDbVerifiedAnswerStore.ts | 1 + 9 files changed, 83 insertions(+), 70 deletions(-) rename packages/chatbot-server-mongodb-public/src/{config.test.ts => processors/findContentWithMongoDbMetadata.test.ts} (89%) create mode 100644 packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index faa47a07a..892e4a608 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -32,11 +32,10 @@ import { redactConnectionUri } from "./middleware/redactConnectionUri"; import path from "path"; import express from "express"; import { - FindContentFunc, logger, makeMongoDbSearchResultsStore, - updateFrontMatter, } from "mongodb-rag-core"; +import { createAzure } from "mongodb-rag-core/aiSdk"; import { wrapOpenAI, wrapTraced, @@ -61,8 +60,7 @@ import { makeGenerateResponseWithSearchTool } from "./processors/generateRespons import { makeBraintrustLogger } from "mongodb-rag-core/braintrust"; import { makeMongoDbScrubbedMessageStore } from "./tracing/scrubbedMessages/MongoDbScrubbedMessageStore"; import { MessageAnalysis } from "./tracing/scrubbedMessages/analyzeMessage"; -import { createAzure, LanguageModel } from "mongodb-rag-core/aiSdk"; -import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata"; +import { makeFindContentWithMongoDbMetadata } from "./processors/findContentWithMongoDbMetadata"; export const { MONGODB_CONNECTION_URI, @@ -161,40 +159,6 @@ embeddedContentStore.findNearestNeighbors = wrapTraced( { name: "findNearestNeighbors" } ); -export const makeFindContentWithMongoDbMetadata = ({ - findContent, - classifierModel, -}: { - findContent: FindContentFunc; - classifierModel: LanguageModel; -}) => { - const wrappedFindContent: FindContentFunc = wrapTraced( - async ({ query, filters, limit }) => { - const { product, programmingLanguage } = - await classifyMongoDbProgrammingLanguageAndProduct( - classifierModel, - query - ); - - const preProcessedQuery = updateFrontMatter(query, { - ...(product ? { product } : {}), - ...(programmingLanguage ? { programmingLanguage } : {}), - }); - - const res = await findContent({ - query: preProcessedQuery, - filters, - limit, - }); - return res; - }, - { - name: "makeFindContentWithMongoDbMetadata", - } - ); - return wrappedFindContent; -}; - export const findContent = wrapTraced( makeDefaultFindContent({ embedder, diff --git a/packages/chatbot-server-mongodb-public/src/config.test.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts similarity index 89% rename from packages/chatbot-server-mongodb-public/src/config.test.ts rename to packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts index 41a0ce074..f4b3f76d9 100644 --- a/packages/chatbot-server-mongodb-public/src/config.test.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts @@ -1,9 +1,6 @@ -import { makeFindContentWithMongoDbMetadata } from "./config"; - // Mocks - -jest.mock("mongodb-rag-core/mongoDbMetadata", () => { - const actual = jest.requireActual("mongodb-rag-core/mongoDbMetadata"); +jest.mock("./findContentWithMongoDbMetadata", () => { + const actual = jest.requireActual("./findContentWithMongoDbMetadata"); return { ...actual, classifyMongoDbProgrammingLanguageAndProduct: jest.fn(), @@ -26,9 +23,9 @@ jest.mock("mongodb-rag-core/braintrust", () => { }; }); -import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata"; import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; import { wrapTraced } from "mongodb-rag-core/braintrust"; +import { classifyMongoDbProgrammingLanguageAndProduct, makeFindContentWithMongoDbMetadata } from "./findContentWithMongoDbMetadata"; const mockedClassify = classifyMongoDbProgrammingLanguageAndProduct as jest.Mock; diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts new file mode 100644 index 000000000..eebef34b1 --- /dev/null +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts @@ -0,0 +1,55 @@ +import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; +import { LanguageModel } from "mongodb-rag-core/aiSdk"; +import { wrapTraced } from "mongodb-rag-core/braintrust"; +import { classifyMongoDbProduct, classifyMongoDbProgrammingLanguage } from "mongodb-rag-core/mongoDbMetadata"; + +function nullOnErr() { + return null; +} + +export const classifyMongoDbProgrammingLanguageAndProduct = wrapTraced( + async (model: LanguageModel, data: string, maxRetries?: number) => { + const [programmingLanguage, product] = await Promise.all([ + classifyMongoDbProgrammingLanguage(model, data, maxRetries).catch( + nullOnErr + ), + classifyMongoDbProduct(model, data, maxRetries).catch(nullOnErr), + ]); + return { programmingLanguage, product }; + }, + { name: "classifyMongoDbProgrammingLanguageAndProduct" } +); + +export const makeFindContentWithMongoDbMetadata = ({ + findContent, + classifierModel, +}: { + findContent: FindContentFunc; + classifierModel: LanguageModel; +}) => { + const wrappedFindContent: FindContentFunc = wrapTraced( + async ({ query, filters, limit }) => { + const { product, programmingLanguage } = + await classifyMongoDbProgrammingLanguageAndProduct( + classifierModel, + query + ); + + const preProcessedQuery = updateFrontMatter(query, { + ...(product ? { product } : {}), + ...(programmingLanguage ? { programmingLanguage } : {}), + }); + + const res = await findContent({ + query: preProcessedQuery, + filters, + limit, + }); + return res; + }, + { + name: "makeFindContentWithMongoDbMetadata", + } + ); + return wrappedFindContent; +}; diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts index 811ec83bd..6b1fad4af 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts @@ -1,7 +1,7 @@ import request from "supertest"; import { makeTestApp } from "../../test/testHelpers"; import type { MakeContentRouterParams } from "./contentRouter"; -import type { MongoDbSearchResultsStore } from "mongodb-rag-core"; +import type { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core"; // Minimal in-memory mock for SearchResultsStore for testing purposes const mockSearchResultsStore: MongoDbSearchResultsStore = { @@ -15,14 +15,17 @@ const mockSearchResultsStore: MongoDbSearchResultsStore = { init: jest.fn() }; +const findContentMock = jest.fn().mockResolvedValue({ + content: [], + queryEmbedding: [], +}) satisfies FindContentFunc; + // Helper to build contentRouterConfig for the test app function makeContentRouterConfig( overrides: Partial = {} ) { return { - findContent: jest - .fn() - .mockResolvedValue({ content: [], queryEmbedding: [] }), + findContent: findContentMock, searchResultsStore: mockSearchResultsStore, ...overrides, } satisfies MakeContentRouterParams; @@ -77,7 +80,7 @@ describe("contentRouter", () => { expect(mockMiddleware).toHaveBeenCalled(); }); - it("should use the 'limit' parameter to not return more results than requested", async () => { + it("should pass the 'limit' parameter to findContent", async () => { const { app, origin } = await makeTestApp({ contentRouterConfig: makeContentRouterConfig(), }); @@ -93,6 +96,8 @@ describe("contentRouter", () => { expect(res.status).toBe(200); expect(res.body).toHaveProperty("results"); expect(Array.isArray(res.body.results)).toBe(true); - expect(res.body.results.length).toBeLessThanOrEqual(limit); + expect(findContentMock).toHaveBeenCalledWith( + expect.objectContaining({ limit }) + ); }); }); \ No newline at end of file diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 5f1731530..f3acbf446 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -93,7 +93,7 @@ function mapFindContentResultToSearchContentResponseChunk( return { results: result.content.map(({ url, metadata, text }) => ({ url, - title: metadata?.pageTitle || "", + title: metadata?.pageTitle ?? "", text, metadata, })), diff --git a/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts b/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts index c9d8b828b..18cc19c9d 100644 --- a/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts +++ b/packages/mongodb-rag-core/src/contentStore/EmbeddedContent.ts @@ -96,9 +96,9 @@ export interface GetSourcesMatchParams { Filters for querying the embedded content vector store. */ export type QueryFilters = { - sourceName?: string | { $in: string[] }; - version?: { current?: boolean; label?: string | { $in: string[] } }; - sourceType?: Page["sourceType"] | { $in: string[] }; + sourceName?: string | string[]; + version?: { current?: boolean; label?: string | string[] }; + sourceType?: Page["sourceType"] | string[]; }; /** diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts index 9297a23b9..3ce3dc3d3 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts @@ -305,15 +305,19 @@ const handleFilters = ( ): MongoDbAtlasVectorSearchFilter => { const vectorSearchFilter: MongoDbAtlasVectorSearchFilter = {}; if (filter.sourceName) { - vectorSearchFilter["sourceName"] = filter.sourceName; + vectorSearchFilter["sourceName"] = Array.isArray(filter.sourceName) + ? { $in: filter.sourceName } + : filter.sourceName; } if (filter.sourceType) { - vectorSearchFilter["sourceType"] = filter.sourceType; + vectorSearchFilter["sourceType"] = Array.isArray(filter.sourceType) + ? { $in: filter.sourceType } + : filter.sourceType; } // Handle version filter. Note: unversioned embeddings (isCurrent: null) are treated as current const { current, label } = filter.version ?? {}; if (label) { - vectorSearchFilter["metadata.version.label"] = label; + vectorSearchFilter["metadata.version.label"] = Array.isArray(label) ? { $in: label } : label; } // Return current embeddings if either: // 1. current=true was explicitly requested, or diff --git a/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts b/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts index 067595d09..f5d0e003d 100644 --- a/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts +++ b/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts @@ -136,19 +136,6 @@ function nullOnErr() { return null; } -export const classifyMongoDbProgrammingLanguageAndProduct = wrapTraced( - async (model: LanguageModel, data: string, maxRetries?: number) => { - const [programmingLanguage, product] = await Promise.all([ - classifyMongoDbProgrammingLanguage(model, data, maxRetries).catch( - nullOnErr - ), - classifyMongoDbProduct(model, data, maxRetries).catch(nullOnErr), - ]); - return { programmingLanguage, product }; - }, - { name: "classifyMongoDbProgrammingLanguageAndProduct" } -); - export const classifyMongoDbMetadata = wrapTraced( async (model: LanguageModel, data: string, maxRetries?: number) => { const [programmingLanguage, product, topic] = await Promise.all([ diff --git a/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts b/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts index 53f464da2..05af59a4d 100644 --- a/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts +++ b/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts @@ -51,6 +51,7 @@ export function makeMongoDbVerifiedAnswerStore({ path, limit: k, numCandidates: numCandidates ?? k * 15, + // Do I need to use handleFilters(filter) here? Now that filter could have arrays of strings? filter, }, }, From 6f377f509b403122ebb4bd2f2bcd92ca4eb3c949 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 10 Jul 2025 14:45:39 -0400 Subject: [PATCH 21/37] Correct types --- .../src/routes/content/searchContent.ts | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index f3acbf446..7de4a2a89 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -115,19 +115,11 @@ function mapDataSourcesToFilters( .map((ds) => ds.versionLabel) .filter((v): v is string => !!v); - const filter: QueryFilters = {}; - - if (sourceNames.length) { - filter.sourceName = { $in: sourceNames }; - } - if (sourceTypes.length) { - filter.sourceType = { $in: sourceTypes }; - } - if (versionLabels.length) { - filter.version = { label: { $in: versionLabels } }; - } - - return filter; + return { + ...(sourceNames.length && { sourceName: sourceNames }), + ...(sourceTypes.length && { sourceType: sourceTypes }), + ...(versionLabels.length && { version: { label: versionLabels } }), + }; } async function persistSearchResultsToDatabase(params: { From 31710a4660efc39caee6ee042e9e1848cfc2f469 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 10 Jul 2025 15:03:48 -0400 Subject: [PATCH 22/37] Correct test --- .../src/routes/content/searchContent.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts index f02c4b60a..633839f47 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts @@ -148,7 +148,7 @@ describe("makeSearchContentRoute", () => { expect(findContent).toHaveBeenCalledWith( expect.objectContaining({ limit: 1, - filters: expect.objectContaining({ sourceName: { $in: ["source2"] } }), + filters: expect.objectContaining({ sourceName: ["source2"] }), }) ); }); From fd6649825b9c4083e3f1238d9b6f9585b315377a Mon Sep 17 00:00:00 2001 From: mmeigs Date: Fri, 11 Jul 2025 09:43:52 -0400 Subject: [PATCH 23/37] Fix test return type --- .../src/processors/findContentWithMongoDbMetadata.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts index f4b3f76d9..9f77f024b 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts @@ -67,7 +67,7 @@ describe("makeFindContentWithMongoDbMetadata", () => { const result = await wrappedFindContent({ query: inputQuery, - filters: { sourceName: { $in: ["docs"] } }, + filters: { sourceName: ["docs"] }, limit: 3, }); @@ -79,7 +79,7 @@ describe("makeFindContentWithMongoDbMetadata", () => { expect(findContentMock).toHaveBeenCalledWith({ query: expectedQuery, - filters: { sourceName: { $in: ["docs"] } }, + filters: { sourceName: ["docs"] }, limit: 3, }); From 2f1df271f724e848e584ad1c4498fb0a53896ade Mon Sep 17 00:00:00 2001 From: mmeigs Date: Fri, 11 Jul 2025 09:47:58 -0400 Subject: [PATCH 24/37] lint --- packages/chatbot-server-mongodb-public/src/config.ts | 5 +---- .../processors/findContentWithMongoDbMetadata.test.ts | 5 ++++- .../src/processors/findContentWithMongoDbMetadata.ts | 5 ++++- .../src/routes/content/contentRouter.test.ts | 9 ++++++--- .../src/routes/content/searchContent.test.ts | 2 +- .../src/contentStore/MongoDbEmbeddedContentStore.ts | 4 +++- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index 892e4a608..3ee981c03 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -31,10 +31,7 @@ import { import { redactConnectionUri } from "./middleware/redactConnectionUri"; import path from "path"; import express from "express"; -import { - logger, - makeMongoDbSearchResultsStore, -} from "mongodb-rag-core"; +import { logger, makeMongoDbSearchResultsStore } from "mongodb-rag-core"; import { createAzure } from "mongodb-rag-core/aiSdk"; import { wrapOpenAI, diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts index 9f77f024b..e89d2e1d5 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts @@ -25,7 +25,10 @@ jest.mock("mongodb-rag-core/braintrust", () => { import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; import { wrapTraced } from "mongodb-rag-core/braintrust"; -import { classifyMongoDbProgrammingLanguageAndProduct, makeFindContentWithMongoDbMetadata } from "./findContentWithMongoDbMetadata"; +import { + classifyMongoDbProgrammingLanguageAndProduct, + makeFindContentWithMongoDbMetadata, +} from "./findContentWithMongoDbMetadata"; const mockedClassify = classifyMongoDbProgrammingLanguageAndProduct as jest.Mock; diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts index eebef34b1..6528c883b 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts @@ -1,7 +1,10 @@ import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; import { LanguageModel } from "mongodb-rag-core/aiSdk"; import { wrapTraced } from "mongodb-rag-core/braintrust"; -import { classifyMongoDbProduct, classifyMongoDbProgrammingLanguage } from "mongodb-rag-core/mongoDbMetadata"; +import { + classifyMongoDbProduct, + classifyMongoDbProgrammingLanguage, +} from "mongodb-rag-core/mongoDbMetadata"; function nullOnErr() { return null; diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts index 6b1fad4af..c9c984dbf 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts @@ -1,7 +1,10 @@ import request from "supertest"; import { makeTestApp } from "../../test/testHelpers"; import type { MakeContentRouterParams } from "./contentRouter"; -import type { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core"; +import type { + FindContentFunc, + MongoDbSearchResultsStore, +} from "mongodb-rag-core"; // Minimal in-memory mock for SearchResultsStore for testing purposes const mockSearchResultsStore: MongoDbSearchResultsStore = { @@ -12,7 +15,7 @@ const mockSearchResultsStore: MongoDbSearchResultsStore = { collectionName: "mock", }, saveSearchResult: jest.fn(), - init: jest.fn() + init: jest.fn(), }; const findContentMock = jest.fn().mockResolvedValue({ @@ -100,4 +103,4 @@ describe("contentRouter", () => { expect.objectContaining({ limit }) ); }); -}); \ No newline at end of file +}); diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts index 633839f47..b2ca79963 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts @@ -152,4 +152,4 @@ describe("makeSearchContentRoute", () => { }) ); }); -}); \ No newline at end of file +}); diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts index 3ce3dc3d3..fdecea7ab 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts @@ -317,7 +317,9 @@ const handleFilters = ( // Handle version filter. Note: unversioned embeddings (isCurrent: null) are treated as current const { current, label } = filter.version ?? {}; if (label) { - vectorSearchFilter["metadata.version.label"] = Array.isArray(label) ? { $in: label } : label; + vectorSearchFilter["metadata.version.label"] = Array.isArray(label) + ? { $in: label } + : label; } // Return current embeddings if either: // 1. current=true was explicitly requested, or From 8efc4c68b0abc74ecd83c021b69b86304cb8c6b0 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Fri, 11 Jul 2025 10:28:09 -0400 Subject: [PATCH 25/37] Revert move of classifyMongoDbProgrammingLanguageAndProduct, jest needs function outside of file to mock --- .../findContentWithMongoDbMetadata.test.ts | 7 +++--- .../findContentWithMongoDbMetadata.ts | 22 +------------------ .../src/mongoDbMetadata/classifyMetadata.ts | 13 +++++++++++ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts index e89d2e1d5..2c89375fb 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts @@ -1,6 +1,6 @@ // Mocks -jest.mock("./findContentWithMongoDbMetadata", () => { - const actual = jest.requireActual("./findContentWithMongoDbMetadata"); +jest.mock("mongodb-rag-core/mongoDbMetadata", () => { + const actual = jest.requireActual("mongodb-rag-core/mongoDbMetadata"); return { ...actual, classifyMongoDbProgrammingLanguageAndProduct: jest.fn(), @@ -26,9 +26,10 @@ jest.mock("mongodb-rag-core/braintrust", () => { import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; import { wrapTraced } from "mongodb-rag-core/braintrust"; import { - classifyMongoDbProgrammingLanguageAndProduct, makeFindContentWithMongoDbMetadata, } from "./findContentWithMongoDbMetadata"; +import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata"; + const mockedClassify = classifyMongoDbProgrammingLanguageAndProduct as jest.Mock; diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts index 6528c883b..a08d9f21f 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts @@ -1,27 +1,7 @@ import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; import { LanguageModel } from "mongodb-rag-core/aiSdk"; import { wrapTraced } from "mongodb-rag-core/braintrust"; -import { - classifyMongoDbProduct, - classifyMongoDbProgrammingLanguage, -} from "mongodb-rag-core/mongoDbMetadata"; - -function nullOnErr() { - return null; -} - -export const classifyMongoDbProgrammingLanguageAndProduct = wrapTraced( - async (model: LanguageModel, data: string, maxRetries?: number) => { - const [programmingLanguage, product] = await Promise.all([ - classifyMongoDbProgrammingLanguage(model, data, maxRetries).catch( - nullOnErr - ), - classifyMongoDbProduct(model, data, maxRetries).catch(nullOnErr), - ]); - return { programmingLanguage, product }; - }, - { name: "classifyMongoDbProgrammingLanguageAndProduct" } -); +import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/mongoDbMetadata"; export const makeFindContentWithMongoDbMetadata = ({ findContent, diff --git a/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts b/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts index f5d0e003d..067595d09 100644 --- a/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts +++ b/packages/mongodb-rag-core/src/mongoDbMetadata/classifyMetadata.ts @@ -136,6 +136,19 @@ function nullOnErr() { return null; } +export const classifyMongoDbProgrammingLanguageAndProduct = wrapTraced( + async (model: LanguageModel, data: string, maxRetries?: number) => { + const [programmingLanguage, product] = await Promise.all([ + classifyMongoDbProgrammingLanguage(model, data, maxRetries).catch( + nullOnErr + ), + classifyMongoDbProduct(model, data, maxRetries).catch(nullOnErr), + ]); + return { programmingLanguage, product }; + }, + { name: "classifyMongoDbProgrammingLanguageAndProduct" } +); + export const classifyMongoDbMetadata = wrapTraced( async (model: LanguageModel, data: string, maxRetries?: number) => { const [programmingLanguage, product, topic] = await Promise.all([ From 2c9a21be6c1183ad5c38014c2ea961e3cd087e63 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Fri, 11 Jul 2025 13:28:15 -0400 Subject: [PATCH 26/37] Created addCustomData.ts, generics, use in both contentRouter and conversationRouter --- .../src/middleware/requireRequestOrigin.ts | 8 +- .../src/middleware/requireValidIpAddress.ts | 8 +- .../src/processors/addCustomData.ts | 91 +++++++++++++++ .../src/processors/index.ts | 1 + .../src/routes/content/contentRouter.ts | 31 +++++- .../src/routes/content/searchContent.ts | 29 ++++- .../conversations/addMessageToConversation.ts | 2 +- .../conversations/conversationsRouter.ts | 104 +----------------- .../conversations/createConversation.ts | 2 +- .../src/conversations/ConversationsService.ts | 3 +- 10 files changed, 165 insertions(+), 114 deletions(-) create mode 100644 packages/mongodb-chatbot-server/src/processors/addCustomData.ts diff --git a/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts b/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts index 1f6567e04..61e6b68b7 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireRequestOrigin.ts @@ -1,9 +1,13 @@ +import { RequestHandler } from "express"; +import { ParamsDictionary } from "express-serve-static-core"; +import { ParsedQs } from "qs"; import { getRequestId, logRequest, sendErrorResponse } from "../utils"; -import { ConversationsMiddleware } from "../routes/conversations/conversationsRouter"; export const CUSTOM_REQUEST_ORIGIN_HEADER = "X-Request-Origin"; -export function requireRequestOrigin(): ConversationsMiddleware { +export function requireRequestOrigin< + Locals extends Record +>(): RequestHandler { return (req, res, next) => { const reqId = getRequestId(req); diff --git a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts index a4e3914dc..41dd77f76 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.ts @@ -1,8 +1,12 @@ +import { RequestHandler } from "express"; +import { ParamsDictionary } from "express-serve-static-core"; +import { ParsedQs } from "qs"; import { getRequestId, logRequest, sendErrorResponse } from "../utils"; -import { ConversationsMiddleware } from "../routes/conversations/conversationsRouter"; import { isValidIp } from "../routes/conversations/utils"; -export function requireValidIpAddress(): ConversationsMiddleware { +export function requireValidIpAddress< + Locals extends Record +>(): RequestHandler { return (req, res, next) => { const reqId = getRequestId(req); diff --git a/packages/mongodb-chatbot-server/src/processors/addCustomData.ts b/packages/mongodb-chatbot-server/src/processors/addCustomData.ts new file mode 100644 index 000000000..b8f88e802 --- /dev/null +++ b/packages/mongodb-chatbot-server/src/processors/addCustomData.ts @@ -0,0 +1,91 @@ +import { Request, Response } from "express"; + +export type RequestCustomData = Record | undefined; + +/** + Function to add custom data to the {@link Conversation} or content search Request persisted to the database. + Has access to the Express.js request and response plus the values + from the {@link Response.locals} object. + */ +export type AddCustomDataFunc = ( + request: Req, + response: Res +) => Promise; + +const addIpToCustomData: AddCustomDataFunc = async (req) => + req.ip + ? { + ip: req.ip, + } + : undefined; + +const addOriginToCustomData: AddCustomDataFunc = async (_, res) => + res.locals.customData.origin + ? { + origin: res.locals.customData.origin, + } + : undefined; + +export const originCodes = [ + "LEARN", + "DEVELOPER", + "DOCS", + "DOTCOM", + "GEMINI_CODE_ASSIST", + "VSCODE", + "OTHER", +] as const; + +export type OriginCode = (typeof originCodes)[number]; + +interface OriginRule { + regex: RegExp; + code: OriginCode; +} + +const ORIGIN_RULES: OriginRule[] = [ + { regex: /learn\.mongodb\.com/, code: "LEARN" }, + { regex: /mongodb\.com\/developer/, code: "DEVELOPER" }, + { regex: /mongodb\.com\/docs/, code: "DOCS" }, + { regex: /mongodb\.com\//, code: "DOTCOM" }, + { regex: /google-gemini-code-assist/, code: "GEMINI_CODE_ASSIST" }, + { regex: /vscode-mongodb-copilot/, code: "VSCODE" }, +]; + +function getOriginCode(origin: string): OriginCode { + for (const rule of ORIGIN_RULES) { + if (rule.regex.test(origin)) { + return rule.code; + } + } + return "OTHER"; +} + +const addOriginCodeToCustomData: AddCustomDataFunc = async (_, res) => { + const origin = res.locals.customData.origin; + return typeof origin === "string" && origin.length > 0 + ? { + originCode: getOriginCode(origin), + } + : undefined; +}; + +const addUserAgentToCustomData: AddCustomDataFunc = async (req) => + req.headers["user-agent"] + ? { + userAgent: req.headers["user-agent"], + } + : undefined; + +export type AddDefinedCustomDataFunc = ( + ...args: Parameters +) => Promise>; + +export const addDefaultCustomData: AddDefinedCustomDataFunc = async (req, res) => { + return { + ...(await addIpToCustomData(req, res)), + ...(await addOriginToCustomData(req, res)), + ...(await addOriginCodeToCustomData(req, res)), + ...(await addUserAgentToCustomData(req, res)), + }; +}; diff --git a/packages/mongodb-chatbot-server/src/processors/index.ts b/packages/mongodb-chatbot-server/src/processors/index.ts index 1f7975e67..399d44aed 100644 --- a/packages/mongodb-chatbot-server/src/processors/index.ts +++ b/packages/mongodb-chatbot-server/src/processors/index.ts @@ -9,3 +9,4 @@ export * from "./InputGuardrail"; export * from "./makeVerifiedAnswerGenerateResponse"; export * from "./includeChunksForMaxTokensPossible"; export * from "./GenerateResponse"; +export * from "./addCustomData"; diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index 31fd333ac..4fecd6f56 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -1,9 +1,15 @@ -import { RequestHandler, Router } from "express"; +import { NextFunction, RequestHandler, Response, Router } from "express"; import { ParamsDictionary } from "express-serve-static-core"; import { FindContentFunc, MongoDbSearchResultsStore } from "mongodb-rag-core"; +import { ParsedQs } from "qs"; import validateRequestSchema from "../../middleware/validateRequestSchema"; import { SearchContentRequest, makeSearchContentRoute } from "./searchContent"; +import { requireRequestOrigin, requireValidIpAddress } from "../../middleware"; +import { AddCustomDataFunc, addDefaultCustomData, RequestCustomData } from "../../processors"; + + +export type SearchContentCustomData = RequestCustomData; /** Middleware to put in front of all the routes in the contentRouter. @@ -17,7 +23,7 @@ export type SearchContentMiddleware = RequestHandler< ParamsDictionary, unknown, unknown, - unknown, + ParsedQs, SearchContentRouterLocals >; @@ -30,20 +36,37 @@ export interface SearchContentRouterLocals { customData: Record; } +/** + Express.js Response from the app's {@link ConversationsService}. + */ +export type SearchContentRouterResponse = Response< + // eslint-disable-next-line @typescript-eslint/no-explicit-any + any, + SearchContentRouterLocals +>; + export interface MakeContentRouterParams { findContent: FindContentFunc; searchResultsStore: MongoDbSearchResultsStore; - // TODO: Add default middleware along with customData as in conversationsRouter + addCustomData?: AddCustomDataFunc; middleware?: SearchContentMiddleware[]; } export function makeContentRouter({ findContent, searchResultsStore, - middleware = [], + addCustomData = addDefaultCustomData, + middleware = [requireValidIpAddress(), requireRequestOrigin()], }: MakeContentRouterParams) { const contentRouter = Router(); + // Set the customData and conversations on the response locals + // for use in subsequent middleware. + contentRouter.use(((_, res: Response, next: NextFunction) => { + res.locals.customData = {}; + next(); + }) satisfies RequestHandler); + // Add middleware to the conversationsRouter. middleware?.forEach((middleware) => contentRouter.use(middleware)); diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 7de4a2a89..496dc11e5 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -2,6 +2,7 @@ import { Request as ExpressRequest, Response as ExpressResponse, } from "express"; +import { ParamsDictionary } from "express-serve-static-core"; import { FindContentFunc, FindContentResult, @@ -14,7 +15,8 @@ import { z } from "zod"; import { SomeExpressRequest } from "../../middleware"; import { makeRequestError } from "../conversations/utils"; -import { SearchContentRouterLocals } from "./contentRouter"; +import { SearchContentCustomData, SearchContentRouterLocals } from "./contentRouter"; +import { AddCustomDataFunc } from "../../processors"; export const SearchContentRequestBody = z.object({ query: z.string(), @@ -37,6 +39,7 @@ export type SearchContentRequestBody = z.infer; export interface MakeSearchContentRouteParams { findContent: FindContentFunc; searchResultsStore: MongoDbSearchResultsStore; + addCustomData?: AddCustomDataFunc; } interface SearchContentResponseChunk { @@ -58,9 +61,10 @@ interface SearchContentResponseBody { export function makeSearchContentRoute({ findContent, searchResultsStore, + addCustomData }: MakeSearchContentRouteParams) { return async ( - req: ExpressRequest, + req: ExpressRequest, res: ExpressResponse ) => { try { @@ -71,6 +75,8 @@ export function makeSearchContentRoute({ limit, }); res.json(mapFindContentResultToSearchContentResponseChunk(results)); + + const customData = await getCustomData(req, res, addCustomData); await persistSearchResultsToDatabase({ query, results, @@ -137,3 +143,22 @@ async function persistSearchResultsToDatabase(params: { createdAt: new Date(), }); } + + +async function getCustomData( + req: ExpressRequest, + res: ExpressResponse, + addCustomData?: AddCustomDataFunc +): Promise { + try { + if (addCustomData) { + return await addCustomData(req, res); + } + } catch (error) { + throw makeRequestError({ + message: "Error parsing custom data from the request", + stack: (error as Error).stack, + httpStatus: 500, + }); + } +} diff --git a/packages/mongodb-chatbot-server/src/routes/conversations/addMessageToConversation.ts b/packages/mongodb-chatbot-server/src/routes/conversations/addMessageToConversation.ts index 24b150ffe..bf4b0b8f2 100644 --- a/packages/mongodb-chatbot-server/src/routes/conversations/addMessageToConversation.ts +++ b/packages/mongodb-chatbot-server/src/routes/conversations/addMessageToConversation.ts @@ -22,7 +22,6 @@ import { getRequestId, logRequest, sendErrorResponse } from "../../utils"; import { z } from "zod"; import { SomeExpressRequest } from "../../middleware/validateRequestSchema"; import { - AddCustomDataFunc, ConversationsRouterLocals, } from "./conversationsRouter"; import { wrapTraced, Logger } from "mongodb-rag-core/braintrust"; @@ -31,6 +30,7 @@ import { GenerateResponse, GenerateResponseParams, } from "../../processors/GenerateResponse"; +import { AddCustomDataFunc } from "../../processors"; export const DEFAULT_MAX_INPUT_LENGTH = 3000; // magic number for max input size for LLM export const DEFAULT_MAX_USER_MESSAGES_IN_CONVERSATION = 7; // magic number for max messages in a conversation diff --git a/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts b/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts index d98f1e6ce..65419a513 100644 --- a/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts @@ -32,6 +32,7 @@ import { import { UpdateTraceFunc } from "./UpdateTraceFunc"; import { GenerateResponse } from "../../processors/GenerateResponse"; import { Logger } from "mongodb-rag-core/braintrust"; +import { AddCustomDataFunc, addDefaultCustomData } from "../../processors"; /** Configuration for rate limiting on the /conversations/* routes. @@ -62,16 +63,6 @@ export interface ConversationsRateLimitConfig { addMessageSlowDownConfig?: SlowDownOptions; } -/** - Function to add custom data to the {@link Conversation} persisted to the database. - Has access to the Express.js request and response plus the {@link ConversationsRouterLocals} - from the {@link Response.locals} object. - */ -export type AddCustomDataFunc = ( - request: Request, - response: ConversationsRouterResponse -) => Promise; - /** Express.js Request that exposes the app's {@link ConversationsService}. @@ -193,95 +184,6 @@ export interface ConversationsRouterParams { braintrustLogger?: Logger; } -const addIpToCustomData: AddCustomDataFunc = async (req) => - req.ip - ? { - ip: req.ip, - } - : undefined; - -const addOriginToCustomData: AddCustomDataFunc = async (_, res) => - res.locals.customData.origin - ? { - origin: res.locals.customData.origin, - } - : undefined; - -export const originCodes = [ - "LEARN", - "DEVELOPER", - "DOCS", - "DOTCOM", - "GEMINI_CODE_ASSIST", - "VSCODE", - "OTHER", -] as const; - -export type OriginCode = (typeof originCodes)[number]; - -interface OriginRule { - regex: RegExp; - code: OriginCode; -} - -const ORIGIN_RULES: OriginRule[] = [ - { regex: /learn\.mongodb\.com/, code: "LEARN" }, - { regex: /mongodb\.com\/developer/, code: "DEVELOPER" }, - { regex: /mongodb\.com\/docs/, code: "DOCS" }, - { regex: /mongodb\.com\//, code: "DOTCOM" }, - { regex: /google-gemini-code-assist/, code: "GEMINI_CODE_ASSIST" }, - { regex: /vscode-mongodb-copilot/, code: "VSCODE" }, -]; - -function getOriginCode(origin: string): OriginCode { - for (const rule of ORIGIN_RULES) { - if (rule.regex.test(origin)) { - return rule.code; - } - } - return "OTHER"; -} - -const addOriginCodeToCustomData: AddCustomDataFunc = async (_, res) => { - const origin = res.locals.customData.origin; - return typeof origin === "string" && origin.length > 0 - ? { - originCode: getOriginCode(origin), - } - : undefined; -}; - -const addUserAgentToCustomData: AddCustomDataFunc = async (req) => - req.headers["user-agent"] - ? { - userAgent: req.headers["user-agent"], - } - : undefined; - -export type AddDefinedCustomDataFunc = ( - ...args: Parameters -) => Promise>; - -export const defaultCreateConversationCustomData: AddDefinedCustomDataFunc = - async (req, res) => { - return { - ...(await addIpToCustomData(req, res)), - ...(await addOriginToCustomData(req, res)), - ...(await addOriginCodeToCustomData(req, res)), - ...(await addUserAgentToCustomData(req, res)), - }; - }; - -export const defaultAddMessageToConversationCustomData: AddDefinedCustomDataFunc = - async (req, res) => { - return { - ...(await addIpToCustomData(req, res)), - ...(await addOriginToCustomData(req, res)), - ...(await addOriginCodeToCustomData(req, res)), - ...(await addUserAgentToCustomData(req, res)), - }; - }; - /** Constructor function to make the /conversations/* Express.js router. */ @@ -292,8 +194,8 @@ export function makeConversationsRouter({ maxUserMessagesInConversation, rateLimitConfig, middleware = [requireValidIpAddress(), requireRequestOrigin()], - createConversationCustomData = defaultCreateConversationCustomData, - addMessageToConversationCustomData = defaultAddMessageToConversationCustomData, + createConversationCustomData = addDefaultCustomData, + addMessageToConversationCustomData = addDefaultCustomData, addMessageToConversationUpdateTrace, rateMessageUpdateTrace, commentMessageUpdateTrace, diff --git a/packages/mongodb-chatbot-server/src/routes/conversations/createConversation.ts b/packages/mongodb-chatbot-server/src/routes/conversations/createConversation.ts index ef6aec2f5..4481af5d9 100644 --- a/packages/mongodb-chatbot-server/src/routes/conversations/createConversation.ts +++ b/packages/mongodb-chatbot-server/src/routes/conversations/createConversation.ts @@ -17,9 +17,9 @@ import { import { getRequestId, logRequest, sendErrorResponse } from "../../utils"; import { SomeExpressRequest } from "../../middleware/validateRequestSchema"; import { - AddCustomDataFunc, ConversationsRouterLocals, } from "./conversationsRouter"; +import { AddCustomDataFunc } from "../../processors"; export type CreateConversationRequest = z.infer< typeof CreateConversationRequest diff --git a/packages/mongodb-rag-core/src/conversations/ConversationsService.ts b/packages/mongodb-rag-core/src/conversations/ConversationsService.ts index 5bc97bfe5..afdaebe8b 100644 --- a/packages/mongodb-rag-core/src/conversations/ConversationsService.ts +++ b/packages/mongodb-rag-core/src/conversations/ConversationsService.ts @@ -4,6 +4,7 @@ import { References } from "../References"; import { WithScore } from "../VectorStore"; import { VerifiedAnswer } from "../verifiedAnswers"; import { OpenAI } from "openai"; +import { RequestCustomData } from "mongodb-chatbot-server/src/processors"; export type MessageBase = { /** @@ -147,7 +148,7 @@ export type DbMessage = SomeMessage & { */ export type Message = DbMessage; -export type ConversationCustomData = Record | undefined; +export type ConversationCustomData = RequestCustomData; /** Conversation between the user and the chatbot as stored in the database. From ff7559466a6d6fffebbd172fd62942cc5f7dd394 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Fri, 11 Jul 2025 14:01:12 -0400 Subject: [PATCH 27/37] Clean --- packages/chatbot-server-mongodb-public/src/config.ts | 10 +++------- .../src/conversations/ConversationsService.ts | 3 +-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index 3ee981c03..73e5d707a 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -16,9 +16,8 @@ import { requireRequestOrigin, AddCustomDataFunc, makeDefaultFindVerifiedAnswer, - defaultCreateConversationCustomData, - defaultAddMessageToConversationCustomData, makeVerifiedAnswerGenerateResponse, + addDefaultCustomData, } from "mongodb-chatbot-server"; import cookieParser from "cookie-parser"; import { blockGetRequests } from "./middleware/blockGetRequests"; @@ -269,7 +268,7 @@ export const generateResponse = wrapTraced( export const createConversationCustomDataWithAuthUser: AddCustomDataFunc = async (req, res) => { - const customData = await defaultCreateConversationCustomData(req, res); + const customData = await addDefaultCustomData(req, res); if (req.cookies.auth_user) { customData.authUser = req.cookies.auth_user; } @@ -332,10 +331,7 @@ export const config: AppConfig = { ? createConversationCustomDataWithAuthUser : undefined, addMessageToConversationCustomData: async (req, res) => { - const defaultCustomData = await defaultAddMessageToConversationCustomData( - req, - res - ); + const defaultCustomData = await addDefaultCustomData(req, res); const customData = { ...defaultCustomData, }; diff --git a/packages/mongodb-rag-core/src/conversations/ConversationsService.ts b/packages/mongodb-rag-core/src/conversations/ConversationsService.ts index afdaebe8b..5bc97bfe5 100644 --- a/packages/mongodb-rag-core/src/conversations/ConversationsService.ts +++ b/packages/mongodb-rag-core/src/conversations/ConversationsService.ts @@ -4,7 +4,6 @@ import { References } from "../References"; import { WithScore } from "../VectorStore"; import { VerifiedAnswer } from "../verifiedAnswers"; import { OpenAI } from "openai"; -import { RequestCustomData } from "mongodb-chatbot-server/src/processors"; export type MessageBase = { /** @@ -148,7 +147,7 @@ export type DbMessage = SomeMessage & { */ export type Message = DbMessage; -export type ConversationCustomData = RequestCustomData; +export type ConversationCustomData = Record | undefined; /** Conversation between the user and the chatbot as stored in the database. From 19ac7d499e5b74145c6811efce3cfcef09bcc661 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Fri, 11 Jul 2025 14:48:27 -0400 Subject: [PATCH 28/37] Remove unnecessary tests and comments --- .../findContentWithMongoDbMetadata.test.ts | 19 ++----- .../src/routes/content/contentRouter.test.ts | 52 ------------------- .../MongoDbVerifiedAnswerStore.ts | 1 - 3 files changed, 4 insertions(+), 68 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts index 2c89375fb..547254a6f 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.test.ts @@ -15,16 +15,7 @@ jest.mock("mongodb-rag-core", () => { }; }); -jest.mock("mongodb-rag-core/braintrust", () => { - const actual = jest.requireActual("mongodb-rag-core/braintrust"); - return { - ...actual, - wrapTraced: jest.fn(), - }; -}); - import { FindContentFunc, updateFrontMatter } from "mongodb-rag-core"; -import { wrapTraced } from "mongodb-rag-core/braintrust"; import { makeFindContentWithMongoDbMetadata, } from "./findContentWithMongoDbMetadata"; @@ -34,7 +25,6 @@ import { classifyMongoDbProgrammingLanguageAndProduct } from "mongodb-rag-core/m const mockedClassify = classifyMongoDbProgrammingLanguageAndProduct as jest.Mock; const mockedUpdateFrontMatter = updateFrontMatter as jest.Mock; -const mockedWrapTraced = wrapTraced as jest.Mock; function makeMockFindContent(result: string[]): FindContentFunc { return jest.fn().mockResolvedValue(result); @@ -48,10 +38,10 @@ describe("makeFindContentWithMongoDbMetadata", () => { test("enhances query with front matter and classification", async () => { const inputQuery = "How do I use MongoDB with TypeScript?"; const expectedQuery = `--- - product: driver - programmingLanguage: typescript - --- - How do I use MongoDB with TypeScript?`; +product: driver +programmingLanguage: typescript +--- +How do I use MongoDB with TypeScript?`; const fakeResult = ["doc1", "doc2"]; mockedClassify.mockResolvedValue({ @@ -59,7 +49,6 @@ describe("makeFindContentWithMongoDbMetadata", () => { programmingLanguage: "typescript", }); mockedUpdateFrontMatter.mockReturnValue(expectedQuery); - mockedWrapTraced.mockImplementation((fn) => fn); const findContentMock = makeMockFindContent(fakeResult); diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts index c9c984dbf..306b6bdca 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts @@ -37,37 +37,6 @@ function makeContentRouterConfig( describe("contentRouter", () => { const searchEndpoint = "/api/v1/content/search"; - it("should return search results for a valid request", async () => { - const { app, origin } = await makeTestApp({ - contentRouterConfig: makeContentRouterConfig(), - }); - const res = await request(app) - .post(searchEndpoint) - .set("req-id", "test-req-id") - .set("Origin", origin) - .send({ - query: "mongodb", - limit: 2, - }); - expect(res.status).toBe(200); - expect(res.body).toHaveProperty("results"); - expect(Array.isArray(res.body.results)).toBe(true); - }); - - it("should return 400 for missing query field", async () => { - const { app, origin } = await makeTestApp({ - contentRouterConfig: makeContentRouterConfig(), - }); - const res = await request(app) - .post(searchEndpoint) - .set("req-id", "test-req-id") - .set("Origin", origin) - .send({}); - - expect(res.body).toHaveProperty("error"); - expect(res.body.error).toBe("Invalid request"); - }); - it("should call custom middleware if provided", async () => { const mockMiddleware = jest.fn((_req, _res, next) => next()); const { app, origin } = await makeTestApp({ @@ -82,25 +51,4 @@ describe("contentRouter", () => { .send({ query: "mongodb" }); expect(mockMiddleware).toHaveBeenCalled(); }); - - it("should pass the 'limit' parameter to findContent", async () => { - const { app, origin } = await makeTestApp({ - contentRouterConfig: makeContentRouterConfig(), - }); - const limit = 1; - const res = await request(app) - .post(searchEndpoint) - .set("req-id", "test-req-id") - .set("Origin", origin) - .send({ - query: "mongodb", - limit, - }); - expect(res.status).toBe(200); - expect(res.body).toHaveProperty("results"); - expect(Array.isArray(res.body.results)).toBe(true); - expect(findContentMock).toHaveBeenCalledWith( - expect.objectContaining({ limit }) - ); - }); }); diff --git a/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts b/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts index 05af59a4d..53f464da2 100644 --- a/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts +++ b/packages/mongodb-rag-core/src/verifiedAnswers/MongoDbVerifiedAnswerStore.ts @@ -51,7 +51,6 @@ export function makeMongoDbVerifiedAnswerStore({ path, limit: k, numCandidates: numCandidates ?? k * 15, - // Do I need to use handleFilters(filter) here? Now that filter could have arrays of strings? filter, }, }, From cadae06392130e1c295e776e513ad8d6d0549dfd Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 14 Jul 2025 11:12:59 -0400 Subject: [PATCH 29/37] Added custom middleware to contentRouter, used in searchContent route, added to tests --- .../src/config.ts | 1 + .../src/routes/content/contentRouter.test.ts | 57 +++++++++++++++---- .../src/routes/content/contentRouter.ts | 2 +- .../src/routes/content/searchContent.test.ts | 2 +- .../src/routes/content/searchContent.ts | 1 + 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index 73e5d707a..732b4c4b3 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -317,6 +317,7 @@ export const config: AppConfig = { classifierModel: languageModel, }), searchResultsStore, + middleware: [requireValidIpAddress(), requireRequestOrigin()], }, conversationsRouterConfig: { middleware: [ diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts index 306b6bdca..9eb85774c 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts @@ -1,10 +1,11 @@ +import { Express } from "express"; import request from "supertest"; -import { makeTestApp } from "../../test/testHelpers"; -import type { MakeContentRouterParams } from "./contentRouter"; import type { FindContentFunc, MongoDbSearchResultsStore, } from "mongodb-rag-core"; +import type { MakeContentRouterParams, SearchContentMiddleware } from "./contentRouter"; +import { makeTestApp } from "../../test/testHelpers"; // Minimal in-memory mock for SearchResultsStore for testing purposes const mockSearchResultsStore: MongoDbSearchResultsStore = { @@ -23,8 +24,7 @@ const findContentMock = jest.fn().mockResolvedValue({ queryEmbedding: [], }) satisfies FindContentFunc; -// Helper to build contentRouterConfig for the test app -function makeContentRouterConfig( +function makeMockContentRouterConfig( overrides: Partial = {} ) { return { @@ -35,20 +35,57 @@ function makeContentRouterConfig( } describe("contentRouter", () => { + const ipAddress = "127.0.0.1"; const searchEndpoint = "/api/v1/content/search"; it("should call custom middleware if provided", async () => { const mockMiddleware = jest.fn((_req, _res, next) => next()); const { app, origin } = await makeTestApp({ - contentRouterConfig: makeContentRouterConfig({ + contentRouterConfig: makeMockContentRouterConfig({ middleware: [mockMiddleware], }), }); - await request(app) - .post(searchEndpoint) - .set("req-id", "test-req-id") - .set("Origin", origin) - .send({ query: "mongodb" }); + await createContentReq({ app, origin, query: 'mongodb'}); expect(mockMiddleware).toHaveBeenCalled(); }); + + test("should use route middleware customData", async () => { + const middleware1: SearchContentMiddleware = (_, res, next) => { + res.locals.customData.middleware1 = true; + next(); + }; + let called = false; + const middleware2: SearchContentMiddleware = (_, res, next) => { + expect(res.locals.customData.middleware1).toBe(true); + called = true; + next(); + }; + const { app, origin } = await makeTestApp({ + contentRouterConfig: makeMockContentRouterConfig({ + middleware: [middleware1, middleware2], + }) + }); + await createContentReq({ app, origin, query: 'What is aggregation?' }); + expect(called).toBe(true); + }); + + /** + Helper function to create a new content request + */ + async function createContentReq({ + app, + origin, + query, + }: { + app: Express; + origin: string; + query: string; + }) { + const createContentRes = await request(app) + .post(searchEndpoint) + .set("X-FORWARDED-FOR", ipAddress) + .set("Origin", origin) + .send({ query }); + return createContentRes; + } }); diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index 4fecd6f56..d8e807fdb 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -8,7 +8,6 @@ import { SearchContentRequest, makeSearchContentRoute } from "./searchContent"; import { requireRequestOrigin, requireValidIpAddress } from "../../middleware"; import { AddCustomDataFunc, addDefaultCustomData, RequestCustomData } from "../../processors"; - export type SearchContentCustomData = RequestCustomData; /** @@ -77,6 +76,7 @@ export function makeContentRouter({ makeSearchContentRoute({ findContent, searchResultsStore, + addCustomData, }) ); diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts index b2ca79963..a7487830d 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.test.ts @@ -9,7 +9,7 @@ function makeMockFindContent(result: FindContentResult) { } // Helper to create a mock MongoDbSearchResultsStore -function makeMockMongoDbSearchResultsStore() { +export function makeMockMongoDbSearchResultsStore() { return { drop: jest.fn(), close: jest.fn(), diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 496dc11e5..eb40076f7 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -77,6 +77,7 @@ export function makeSearchContentRoute({ res.json(mapFindContentResultToSearchContentResponseChunk(results)); const customData = await getCustomData(req, res, addCustomData); + // TODO: Save to db?? await persistSearchResultsToDatabase({ query, results, From 25e2c5ca1b97fcb22bdb9499e3694e05a5c33ed1 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 14 Jul 2025 12:59:55 -0400 Subject: [PATCH 30/37] Add customData to db... --- .../src/routes/content/searchContent.ts | 16 +++++++++------- .../contentStore/MongoDbSearchResultsStore.ts | 2 ++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index eb40076f7..a6ced9f2e 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -77,13 +77,13 @@ export function makeSearchContentRoute({ res.json(mapFindContentResultToSearchContentResponseChunk(results)); const customData = await getCustomData(req, res, addCustomData); - // TODO: Save to db?? await persistSearchResultsToDatabase({ query, results, dataSources, limit, searchResultsStore, + ...(customData !== undefined && { customData }), }); } catch (error) { throw makeRequestError({ @@ -129,19 +129,21 @@ function mapDataSourcesToFilters( }; } -async function persistSearchResultsToDatabase(params: { +async function persistSearchResultsToDatabase({ query, results, dataSources, limit, searchResultsStore, customData } : { query: string; results: FindContentResult; dataSources: SearchRecordDataSource[]; limit: number; searchResultsStore: MongoDbSearchResultsStore; + customData?: { [k:string]: unknown; }; }) { - params.searchResultsStore.saveSearchResult({ - query: params.query, - results: params.results.content, - dataSources: params.dataSources, - limit: params.limit, + searchResultsStore.saveSearchResult({ + query, + results: results.content, + dataSources, + limit, createdAt: new Date(), + ...(customData !== undefined && { customData }), }); } diff --git a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts index f9fa77c79..df588ab70 100644 --- a/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts +++ b/packages/mongodb-rag-core/src/contentStore/MongoDbSearchResultsStore.ts @@ -47,6 +47,7 @@ export const SearchResultRecordSchema = z.object({ dataSources: z.array(SearchRecordDataSourceSchema).optional(), limit: z.number().optional(), createdAt: z.date(), + customData: z.object({}).passthrough().optional(), }); export interface SearchResultRecord { @@ -55,6 +56,7 @@ export interface SearchResultRecord { dataSources?: SearchRecordDataSource[]; limit?: number; createdAt: Date; + customData?: Record; } export type MongoDbSearchResultsStore = DatabaseConnection & { From db98f567cfc928dd8d9342a337b2db69ff5c89a9 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 14 Jul 2025 15:38:49 -0400 Subject: [PATCH 31/37] Clean: allow undefined customData value --- .../mongodb-chatbot-server/src/routes/content/searchContent.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index a6ced9f2e..262d75b63 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -83,7 +83,7 @@ export function makeSearchContentRoute({ dataSources, limit, searchResultsStore, - ...(customData !== undefined && { customData }), + customData, }); } catch (error) { throw makeRequestError({ From d435d8fb864894501da019d03c36a42e7940866b Mon Sep 17 00:00:00 2001 From: mmeigs Date: Mon, 14 Jul 2025 16:18:30 -0400 Subject: [PATCH 32/37] Alter types for createConversationsMiddlewareReq --- .../mongodb-chatbot-server/src/test/middlewareTestHelpers.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/mongodb-chatbot-server/src/test/middlewareTestHelpers.ts b/packages/mongodb-chatbot-server/src/test/middlewareTestHelpers.ts index 44cedc8fb..7eb9d7b0e 100644 --- a/packages/mongodb-chatbot-server/src/test/middlewareTestHelpers.ts +++ b/packages/mongodb-chatbot-server/src/test/middlewareTestHelpers.ts @@ -2,6 +2,7 @@ import { Request } from "express"; import { ParamsDictionary } from "express-serve-static-core"; import { createRequest, createResponse } from "node-mocks-http"; import { ConversationsService } from "mongodb-rag-core"; +import { ParsedQs } from "qs"; import { ConversationsRouterLocals, ConversationsRouterResponse, @@ -13,7 +14,7 @@ export const createConversationsMiddlewareReq = () => ParamsDictionary, unknown, unknown, - unknown, + ParsedQs, ConversationsRouterLocals > >(); From 3a5852074de03c703ff818bdd6b57f41341eae08 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Tue, 15 Jul 2025 14:47:50 -0400 Subject: [PATCH 33/37] Rerun tests From 2812d4ef2826ab7d49a9eb10e03df9b8c44cb799 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 16 Jul 2025 10:57:16 -0400 Subject: [PATCH 34/37] PR feedback --- .../src/middleware/validateRequestSchema.ts | 4 ++-- .../src/processors/addCustomData.ts | 6 +++--- .../src/routes/content/searchContent.ts | 20 +++++++++++++------ 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/packages/mongodb-chatbot-server/src/middleware/validateRequestSchema.ts b/packages/mongodb-chatbot-server/src/middleware/validateRequestSchema.ts index 0fd952675..608cf6612 100644 --- a/packages/mongodb-chatbot-server/src/middleware/validateRequestSchema.ts +++ b/packages/mongodb-chatbot-server/src/middleware/validateRequestSchema.ts @@ -5,12 +5,12 @@ import { getRequestId, logRequest, sendErrorResponse } from "../utils"; export const SomeExpressRequest = z.object({ headers: z.object({}).optional(), - params: z.object({}).optional(), + params: z.object({}), query: z.object({}).optional(), body: z.object({}).optional(), }); -function generateZodErrorMessage(error: ZodError) { +export function generateZodErrorMessage(error: ZodError) { return generateErrorMessage(error.issues, { delimiter: { error: "\n", diff --git a/packages/mongodb-chatbot-server/src/processors/addCustomData.ts b/packages/mongodb-chatbot-server/src/processors/addCustomData.ts index b8f88e802..e8de83e6b 100644 --- a/packages/mongodb-chatbot-server/src/processors/addCustomData.ts +++ b/packages/mongodb-chatbot-server/src/processors/addCustomData.ts @@ -7,9 +7,9 @@ export type RequestCustomData = Record | undefined; Has access to the Express.js request and response plus the values from the {@link Response.locals} object. */ -export type AddCustomDataFunc = ( - request: Req, - response: Res +export type AddCustomDataFunc = ( + request: Request, + response: Response ) => Promise; const addIpToCustomData: AddCustomDataFunc = async (req) => diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 262d75b63..1e7946f63 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -13,7 +13,7 @@ import { } from "mongodb-rag-core"; import { z } from "zod"; -import { SomeExpressRequest } from "../../middleware"; +import { generateZodErrorMessage, SomeExpressRequest } from "../../middleware"; import { makeRequestError } from "../conversations/utils"; import { SearchContentCustomData, SearchContentRouterLocals } from "./contentRouter"; import { AddCustomDataFunc } from "../../processors"; @@ -64,10 +64,19 @@ export function makeSearchContentRoute({ addCustomData }: MakeSearchContentRouteParams) { return async ( - req: ExpressRequest, + req: ExpressRequest, res: ExpressResponse ) => { try { + // --- INPUT VALIDATION --- + const { error } = SearchContentRequestBody.safeParse(req.body); + if (error) { + throw makeRequestError({ + httpStatus: 500, + message: generateZodErrorMessage(error), + }); + } + const { query, dataSources, limit } = req.body; const results = await findContent({ query, @@ -75,7 +84,7 @@ export function makeSearchContentRoute({ limit, }); res.json(mapFindContentResultToSearchContentResponseChunk(results)); - + const customData = await getCustomData(req, res, addCustomData); await persistSearchResultsToDatabase({ query, @@ -149,7 +158,7 @@ async function persistSearchResultsToDatabase({ query, results, dataSources, lim async function getCustomData( - req: ExpressRequest, + req: ExpressRequest, res: ExpressResponse, addCustomData?: AddCustomDataFunc ): Promise { @@ -159,9 +168,8 @@ async function getCustomData( } } catch (error) { throw makeRequestError({ - message: "Error parsing custom data from the request", - stack: (error as Error).stack, httpStatus: 500, + message: "Error parsing custom data from the request", }); } } From 9e9c44dfb98c2b82b441af6d3927f596dee84719 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 16 Jul 2025 11:25:58 -0400 Subject: [PATCH 35/37] Add Locals types to middleware invocations --- packages/chatbot-server-mongodb-public/src/config.ts | 8 +++++--- .../src/middleware/requireValidIpAddress.test.ts | 7 ++++--- .../src/routes/content/contentRouter.ts | 2 +- .../src/routes/conversations/conversationsRouter.ts | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/config.ts b/packages/chatbot-server-mongodb-public/src/config.ts index 732b4c4b3..3d2d02cf2 100644 --- a/packages/chatbot-server-mongodb-public/src/config.ts +++ b/packages/chatbot-server-mongodb-public/src/config.ts @@ -18,6 +18,8 @@ import { makeDefaultFindVerifiedAnswer, makeVerifiedAnswerGenerateResponse, addDefaultCustomData, + ConversationsRouterLocals, + SearchContentRouterLocals, } from "mongodb-chatbot-server"; import cookieParser from "cookie-parser"; import { blockGetRequests } from "./middleware/blockGetRequests"; @@ -317,13 +319,13 @@ export const config: AppConfig = { classifierModel: languageModel, }), searchResultsStore, - middleware: [requireValidIpAddress(), requireRequestOrigin()], + middleware: [requireValidIpAddress(), requireRequestOrigin()], }, conversationsRouterConfig: { middleware: [ blockGetRequests, - requireValidIpAddress(), - requireRequestOrigin(), + requireValidIpAddress(), + requireRequestOrigin(), useSegmentIds(), redactConnectionUri(), cookieParser(), diff --git a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.test.ts b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.test.ts index 6f946c24a..8bb027e1e 100644 --- a/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.test.ts +++ b/packages/mongodb-chatbot-server/src/middleware/requireValidIpAddress.test.ts @@ -3,6 +3,7 @@ import { createConversationsMiddlewareReq, createConversationsMiddlewareRes, } from "../test/middlewareTestHelpers"; +import { ConversationsRouterLocals } from "../routes"; const baseReq = { body: { message: "Hello, world!" }, @@ -18,7 +19,7 @@ describe("requireValidIpAddress", () => { const res = createConversationsMiddlewareRes(); const next = jest.fn(); - const middleware = requireValidIpAddress(); + const middleware = requireValidIpAddress(); req.body = baseReq.body; req.params = baseReq.params; req.query = baseReq.query; @@ -39,7 +40,7 @@ describe("requireValidIpAddress", () => { const next = jest.fn(); const invalidIpAddress = "not-an-ip-address"; - const middleware = requireValidIpAddress(); + const middleware = requireValidIpAddress(); req.body = baseReq.body; req.params = baseReq.params; req.query = baseReq.query; @@ -59,7 +60,7 @@ describe("requireValidIpAddress", () => { const res = createConversationsMiddlewareRes(); const next = jest.fn(); - const middleware = requireValidIpAddress(); + const middleware = requireValidIpAddress(); req.body = baseReq.body; req.params = baseReq.params; req.query = baseReq.query; diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index d8e807fdb..f00fc9ec1 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -55,7 +55,7 @@ export function makeContentRouter({ findContent, searchResultsStore, addCustomData = addDefaultCustomData, - middleware = [requireValidIpAddress(), requireRequestOrigin()], + middleware = [requireValidIpAddress(), requireRequestOrigin()], }: MakeContentRouterParams) { const contentRouter = Router(); diff --git a/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts b/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts index 65419a513..892432a79 100644 --- a/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/conversations/conversationsRouter.ts @@ -193,7 +193,7 @@ export function makeConversationsRouter({ maxInputLengthCharacters, maxUserMessagesInConversation, rateLimitConfig, - middleware = [requireValidIpAddress(), requireRequestOrigin()], + middleware = [requireValidIpAddress(), requireRequestOrigin()], createConversationCustomData = addDefaultCustomData, addMessageToConversationCustomData = addDefaultCustomData, addMessageToConversationUpdateTrace, From 4db756787fdbb0583a6cd33ee04dce8d7be6fd12 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Wed, 16 Jul 2025 15:48:51 -0400 Subject: [PATCH 36/37] Lint, fix trace name, remove unnecessary import --- .../findContentWithMongoDbMetadata.ts | 2 +- .../src/processors/addCustomData.ts | 5 ++++- .../src/routes/content/contentRouter.test.ts | 11 ++++++---- .../src/routes/content/contentRouter.ts | 11 ++++++++-- .../src/routes/content/searchContent.ts | 20 +++++++++++++------ 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts index a08d9f21f..854038b57 100644 --- a/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts +++ b/packages/chatbot-server-mongodb-public/src/processors/findContentWithMongoDbMetadata.ts @@ -31,7 +31,7 @@ export const makeFindContentWithMongoDbMetadata = ({ return res; }, { - name: "makeFindContentWithMongoDbMetadata", + name: "findContentWithMongoDbMetadata", } ); return wrappedFindContent; diff --git a/packages/mongodb-chatbot-server/src/processors/addCustomData.ts b/packages/mongodb-chatbot-server/src/processors/addCustomData.ts index e8de83e6b..6b66f194b 100644 --- a/packages/mongodb-chatbot-server/src/processors/addCustomData.ts +++ b/packages/mongodb-chatbot-server/src/processors/addCustomData.ts @@ -81,7 +81,10 @@ export type AddDefinedCustomDataFunc = ( ...args: Parameters ) => Promise>; -export const addDefaultCustomData: AddDefinedCustomDataFunc = async (req, res) => { +export const addDefaultCustomData: AddDefinedCustomDataFunc = async ( + req, + res +) => { return { ...(await addIpToCustomData(req, res)), ...(await addOriginToCustomData(req, res)), diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts index 9eb85774c..0dd7f8a27 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.test.ts @@ -4,7 +4,10 @@ import type { FindContentFunc, MongoDbSearchResultsStore, } from "mongodb-rag-core"; -import type { MakeContentRouterParams, SearchContentMiddleware } from "./contentRouter"; +import type { + MakeContentRouterParams, + SearchContentMiddleware, +} from "./contentRouter"; import { makeTestApp } from "../../test/testHelpers"; // Minimal in-memory mock for SearchResultsStore for testing purposes @@ -45,7 +48,7 @@ describe("contentRouter", () => { middleware: [mockMiddleware], }), }); - await createContentReq({ app, origin, query: 'mongodb'}); + await createContentReq({ app, origin, query: "mongodb" }); expect(mockMiddleware).toHaveBeenCalled(); }); @@ -63,9 +66,9 @@ describe("contentRouter", () => { const { app, origin } = await makeTestApp({ contentRouterConfig: makeMockContentRouterConfig({ middleware: [middleware1, middleware2], - }) + }), }); - await createContentReq({ app, origin, query: 'What is aggregation?' }); + await createContentReq({ app, origin, query: "What is aggregation?" }); expect(called).toBe(true); }); diff --git a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts index f00fc9ec1..fe40a3241 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/contentRouter.ts @@ -6,7 +6,11 @@ import { ParsedQs } from "qs"; import validateRequestSchema from "../../middleware/validateRequestSchema"; import { SearchContentRequest, makeSearchContentRoute } from "./searchContent"; import { requireRequestOrigin, requireValidIpAddress } from "../../middleware"; -import { AddCustomDataFunc, addDefaultCustomData, RequestCustomData } from "../../processors"; +import { + AddCustomDataFunc, + addDefaultCustomData, + RequestCustomData, +} from "../../processors"; export type SearchContentCustomData = RequestCustomData; @@ -55,7 +59,10 @@ export function makeContentRouter({ findContent, searchResultsStore, addCustomData = addDefaultCustomData, - middleware = [requireValidIpAddress(), requireRequestOrigin()], + middleware = [ + requireValidIpAddress(), + requireRequestOrigin(), + ], }: MakeContentRouterParams) { const contentRouter = Router(); diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 1e7946f63..0809f1701 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -2,7 +2,6 @@ import { Request as ExpressRequest, Response as ExpressResponse, } from "express"; -import { ParamsDictionary } from "express-serve-static-core"; import { FindContentFunc, FindContentResult, @@ -15,7 +14,10 @@ import { z } from "zod"; import { generateZodErrorMessage, SomeExpressRequest } from "../../middleware"; import { makeRequestError } from "../conversations/utils"; -import { SearchContentCustomData, SearchContentRouterLocals } from "./contentRouter"; +import { + SearchContentCustomData, + SearchContentRouterLocals, +} from "./contentRouter"; import { AddCustomDataFunc } from "../../processors"; export const SearchContentRequestBody = z.object({ @@ -61,7 +63,7 @@ interface SearchContentResponseBody { export function makeSearchContentRoute({ findContent, searchResultsStore, - addCustomData + addCustomData, }: MakeSearchContentRouteParams) { return async ( req: ExpressRequest, @@ -138,13 +140,20 @@ function mapDataSourcesToFilters( }; } -async function persistSearchResultsToDatabase({ query, results, dataSources, limit, searchResultsStore, customData } : { +async function persistSearchResultsToDatabase({ + query, + results, + dataSources, + limit, + searchResultsStore, + customData, +}: { query: string; results: FindContentResult; dataSources: SearchRecordDataSource[]; limit: number; searchResultsStore: MongoDbSearchResultsStore; - customData?: { [k:string]: unknown; }; + customData?: { [k: string]: unknown }; }) { searchResultsStore.saveSearchResult({ query, @@ -156,7 +165,6 @@ async function persistSearchResultsToDatabase({ query, results, dataSources, lim }); } - async function getCustomData( req: ExpressRequest, res: ExpressResponse, From 690b895d4c38b00cdc5bd7832ae5faf034b189f1 Mon Sep 17 00:00:00 2001 From: mmeigs Date: Thu, 17 Jul 2025 11:23:22 -0400 Subject: [PATCH 37/37] (EAI-972) Add extra braintrust tracing to searchContent route (#822) Add extra braintrust tracing to searchContent route --- .../src/routes/content/searchContent.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts index 0809f1701..bdfbbc6df 100644 --- a/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts +++ b/packages/mongodb-chatbot-server/src/routes/content/searchContent.ts @@ -19,6 +19,7 @@ import { SearchContentRouterLocals, } from "./contentRouter"; import { AddCustomDataFunc } from "../../processors"; +import { wrapTraced } from "mongodb-rag-core/braintrust"; export const SearchContentRequestBody = z.object({ query: z.string(), @@ -65,6 +66,7 @@ export function makeSearchContentRoute({ searchResultsStore, addCustomData, }: MakeSearchContentRouteParams) { + const tracedFindContent = wrapTraced(findContent, { name: "searchContent" }); return async ( req: ExpressRequest, res: ExpressResponse @@ -80,7 +82,7 @@ export function makeSearchContentRoute({ } const { query, dataSources, limit } = req.body; - const results = await findContent({ + const results = await tracedFindContent({ query, filters: mapDataSourcesToFilters(dataSources), limit,