diff --git a/.github/workflows/publish-production.yml b/.github/workflows/publish-production.yml
index 1ab4240be234953..a2a12c1229ebff4 100644
--- a/.github/workflows/publish-production.yml
+++ b/.github/workflows/publish-production.yml
@@ -41,6 +41,11 @@ jobs:
cd distmd && zip -r markdown.zip .
npx wrangler r2 object put vendored-markdown/markdown.zip --file=markdown.zip --remote
rm markdown.zip
+
+ cd distllms
+ for file in $(find . -type f); do
+ npx wrangler r2 object put vendored-markdown/$file --file=$file --remote
+ done
- name: Upload vendored Markdown files to ZT DevDocs bucket
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ZT_DEVDOCS_ACCESS_KEY_ID }}
diff --git a/.gitignore b/.gitignore
index e3de49811f983d3..501cf6fc5247c53 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
# build output
dist/
distmd/
+distllms/
# generated types
.astro/
diff --git a/bin/generate-index-md.ts b/bin/generate-index-md.ts
index 276a4e9b337fe70..9723feb9064d6a2 100644
--- a/bin/generate-index-md.ts
+++ b/bin/generate-index-md.ts
@@ -1,12 +1,23 @@
-import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import {
+ readFileSync,
+ writeFileSync,
+ mkdirSync,
+ appendFileSync,
+} from "node:fs";
import glob from "fast-glob";
import { parse } from "node-html-parser";
import { htmlToMarkdown } from "~/util/markdown";
+import YAML from "yaml";
+
const files = await glob("dist/**/*.html");
for (const file of files) {
+ if (file === "dist/index.html" || file === "dist/404.html") {
+ continue;
+ }
+
const html = readFileSync(file, "utf-8");
const dom = parse(html);
@@ -24,9 +35,42 @@ for (const file of files) {
continue;
}
+ const product = file.split("/")[1];
const path = file.replace("dist/", "distmd/").replace(".html", ".md");
mkdirSync(path.split("/").slice(0, -1).join("/"), { recursive: true });
-
writeFileSync(path, markdown);
+
+ const llmsFullContent = ["", markdown, "\n\n"].join("\n");
+
+ mkdirSync(`distllms/${product}`, { recursive: true });
+ appendFileSync("distllms/llms-full.txt", llmsFullContent);
+ appendFileSync(`distllms/${product}/llms-full.txt`, llmsFullContent);
+
+ try {
+ const path = await glob(`src/content/products/${product}.*`).then((arr) =>
+ arr.at(0),
+ );
+
+ if (!path) {
+ continue;
+ }
+
+ const yaml = YAML.parse(readFileSync(path, "utf-8"));
+ const group = yaml.product?.group?.replaceAll(" ", "-").toLowerCase();
+
+ if (!group) {
+ continue;
+ }
+
+ mkdirSync(`distllms/${group}`, { recursive: true });
+ appendFileSync(`distllms/${group}/llms-full.txt`, llmsFullContent);
+ } catch (error) {
+ if (error instanceof Error) {
+ console.error(
+ `Failed to find a product group for ${product}:`,
+ error.message,
+ );
+ }
+ }
}
diff --git a/src/pages/[area]/llms-full.txt.ts b/src/pages/[area]/llms-full.txt.ts
deleted file mode 100644
index 62d5a374ee093f8..000000000000000
--- a/src/pages/[area]/llms-full.txt.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-import type { APIRoute } from "astro";
-import { getCollection } from "astro:content";
-import { slug } from "github-slugger";
-
-export async function getStaticPaths() {
- const products = await getCollection("products");
-
- const areas = new Set(
- products.flatMap((p) => {
- if (!p.data.product.group) return [];
-
- return slug(p.data.product.group.toLowerCase());
- }),
- );
-
- return [...areas].map((area) => {
- return {
- params: {
- area,
- },
- };
- });
-}
-
-export const GET: APIRoute = async ({ params }) => {
- const products = await getCollection("products", (p) => {
- if (!p.data.product.group) return false;
-
- return slug(p.data.product.group.toLowerCase()) === params.area;
- });
-
- const markdown = await getCollection("docs", (e) => {
- if (!e.body) return false;
-
- if (
- e.id === "warp-client/legal/3rdparty" ||
- e.id === "magic-wan/legal/3rdparty"
- )
- return false;
-
- return products.some((p) =>
- e.id.startsWith(p.data.product.url.slice(1, -1)),
- );
- })
- .then((entries) =>
- entries.map((entry) => {
- return [
- `# ${entry.data.title}`,
- `URL: https://developers.cloudflare.com/${entry.id}/`,
- `${entry.body?.trim()}`,
- "---",
- ].join("\n\n");
- }),
- )
- .then((array) => array.join("\n\n"));
-
- return new Response(markdown, {
- headers: {
- "content-type": "text/plain",
- },
- });
-};
diff --git a/src/pages/[product]/llms-full.txt.ts b/src/pages/[product]/llms-full.txt.ts
deleted file mode 100644
index 057b9f6f4544da4..000000000000000
--- a/src/pages/[product]/llms-full.txt.ts
+++ /dev/null
@@ -1,50 +0,0 @@
-import type { APIRoute } from "astro";
-import { getCollection } from "astro:content";
-
-export async function getStaticPaths() {
- const products = await getCollection("products", (p) => {
- return p.data.product.group;
- });
-
- return products.map((entry) => {
- return {
- params: {
- product: entry.id,
- },
- props: {
- product: entry,
- },
- };
- });
-}
-
-export const GET: APIRoute = async ({ props }) => {
- const markdown = await getCollection("docs", (e) => {
- if (
- e.id === "warp-client/legal/3rdparty" ||
- e.id === "magic-wan/legal/3rdparty"
- )
- return false;
-
- return (
- e.id.startsWith(props.product.data.product.url.slice(1, -1)) && e.body
- );
- })
- .then((entries) =>
- entries.map((entry) => {
- return [
- `# ${entry.data.title}`,
- `URL: https://developers.cloudflare.com/${entry.id}/`,
- `${entry.body?.trim()}`,
- "---",
- ].join("\n\n");
- }),
- )
- .then((array) => array.join("\n\n"));
-
- return new Response(markdown, {
- headers: {
- "content-type": "text/plain",
- },
- });
-};
diff --git a/src/pages/llms-full.txt.ts b/src/pages/llms-full.txt.ts
deleted file mode 100644
index 12fcd84f7c91a67..000000000000000
--- a/src/pages/llms-full.txt.ts
+++ /dev/null
@@ -1,33 +0,0 @@
-import type { APIRoute } from "astro";
-import { getCollection } from "astro:content";
-
-export const GET: APIRoute = async () => {
- const markdown = await getCollection("docs", (e) => {
- if (!e.body) return false;
-
- if (
- e.id === "warp-client/legal/3rdparty" ||
- e.id === "magic-wan/legal/3rdparty"
- )
- return false;
-
- return true;
- })
- .then((entries) =>
- entries.map((entry) => {
- return [
- `# ${entry.data.title}`,
- `URL: https://developers.cloudflare.com/${entry.id}/`,
- `${entry.body?.trim()}`,
- "---",
- ].join("\n\n");
- }),
- )
- .then((array) => array.join("\n\n"));
-
- return new Response(markdown, {
- headers: {
- "content-type": "text/plain",
- },
- });
-};
diff --git a/worker/index.ts b/worker/index.ts
index 728b316593dd2c7..1e05a69b420ab46 100644
--- a/worker/index.ts
+++ b/worker/index.ts
@@ -22,6 +22,17 @@ export default class extends WorkerEntrypoint {
});
}
+ if (request.url.endsWith("/llms-full.txt")) {
+ const { pathname } = new URL(request.url);
+ const res = await this.env.VENDORED_MARKDOWN.get(pathname.slice(1));
+
+ return new Response(res?.body, {
+ headers: {
+ "Content-Type": "text/markdown; charset=utf-8",
+ },
+ });
+ }
+
if (request.url.endsWith("/index.md")) {
const htmlUrl = request.url.replace("index.md", "");
const res = await this.env.ASSETS.fetch(htmlUrl, request);
diff --git a/worker/index.worker.test.ts b/worker/index.worker.test.ts
index 7916f337b989222..19d02a57e68b2d4 100644
--- a/worker/index.worker.test.ts
+++ b/worker/index.worker.test.ts
@@ -204,41 +204,6 @@ describe("Cloudflare Docs", () => {
const text = await response.text();
expect(text).toContain("# Cloudflare Developer Documentation");
});
-
- it("llms-full.txt", async () => {
- const request = new Request("http://fakehost/llms-full.txt");
- const response = await SELF.fetch(request);
-
- expect(response.status).toBe(200);
-
- const text = await response.text();
- expect(text).toContain("URL: https://developers.cloudflare.com/");
- expect(text).toContain('from "~/components"');
- });
-
- it("product-specific llms-full.txt", async () => {
- const request = new Request("http://fakehost/workers/llms-full.txt");
- const response = await SELF.fetch(request);
-
- expect(response.status).toBe(200);
-
- const text = await response.text();
- expect(text).toContain("URL: https://developers.cloudflare.com/");
- expect(text).toContain('from "~/components"');
- });
-
- it("area-specific llms-full.txt", async () => {
- const request = new Request(
- "http://fakehost/developer-platform/llms-full.txt",
- );
- const response = await SELF.fetch(request);
-
- expect(response.status).toBe(200);
-
- const text = await response.text();
- expect(text).toContain("URL: https://developers.cloudflare.com/");
- expect(text).toContain('from "~/components"');
- });
});
describe("index.md handling", () => {