diff --git a/.github/workflows/publish-production.yml b/.github/workflows/publish-production.yml index 1ab4240be234953..a2a12c1229ebff4 100644 --- a/.github/workflows/publish-production.yml +++ b/.github/workflows/publish-production.yml @@ -41,6 +41,11 @@ jobs: cd distmd && zip -r markdown.zip . npx wrangler r2 object put vendored-markdown/markdown.zip --file=markdown.zip --remote rm markdown.zip + + cd distllms + for file in $(find . -type f); do + npx wrangler r2 object put vendored-markdown/$file --file=$file --remote + done - name: Upload vendored Markdown files to ZT DevDocs bucket env: AWS_ACCESS_KEY_ID: ${{ secrets.ZT_DEVDOCS_ACCESS_KEY_ID }} diff --git a/.gitignore b/.gitignore index e3de49811f983d3..501cf6fc5247c53 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # build output dist/ distmd/ +distllms/ # generated types .astro/ diff --git a/bin/generate-index-md.ts b/bin/generate-index-md.ts index 276a4e9b337fe70..9723feb9064d6a2 100644 --- a/bin/generate-index-md.ts +++ b/bin/generate-index-md.ts @@ -1,12 +1,23 @@ -import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; +import { + readFileSync, + writeFileSync, + mkdirSync, + appendFileSync, +} from "node:fs"; import glob from "fast-glob"; import { parse } from "node-html-parser"; import { htmlToMarkdown } from "~/util/markdown"; +import YAML from "yaml"; + const files = await glob("dist/**/*.html"); for (const file of files) { + if (file === "dist/index.html" || file === "dist/404.html") { + continue; + } + const html = readFileSync(file, "utf-8"); const dom = parse(html); @@ -24,9 +35,42 @@ for (const file of files) { continue; } + const product = file.split("/")[1]; const path = file.replace("dist/", "distmd/").replace(".html", ".md"); mkdirSync(path.split("/").slice(0, -1).join("/"), { recursive: true }); - writeFileSync(path, markdown); + + const llmsFullContent = ["", markdown, "\n\n"].join("\n"); + + mkdirSync(`distllms/${product}`, { recursive: true }); + appendFileSync("distllms/llms-full.txt", llmsFullContent); + appendFileSync(`distllms/${product}/llms-full.txt`, llmsFullContent); + + try { + const path = await glob(`src/content/products/${product}.*`).then((arr) => + arr.at(0), + ); + + if (!path) { + continue; + } + + const yaml = YAML.parse(readFileSync(path, "utf-8")); + const group = yaml.product?.group?.replaceAll(" ", "-").toLowerCase(); + + if (!group) { + continue; + } + + mkdirSync(`distllms/${group}`, { recursive: true }); + appendFileSync(`distllms/${group}/llms-full.txt`, llmsFullContent); + } catch (error) { + if (error instanceof Error) { + console.error( + `Failed to find a product group for ${product}:`, + error.message, + ); + } + } } diff --git a/src/pages/[area]/llms-full.txt.ts b/src/pages/[area]/llms-full.txt.ts deleted file mode 100644 index 62d5a374ee093f8..000000000000000 --- a/src/pages/[area]/llms-full.txt.ts +++ /dev/null @@ -1,62 +0,0 @@ -import type { APIRoute } from "astro"; -import { getCollection } from "astro:content"; -import { slug } from "github-slugger"; - -export async function getStaticPaths() { - const products = await getCollection("products"); - - const areas = new Set( - products.flatMap((p) => { - if (!p.data.product.group) return []; - - return slug(p.data.product.group.toLowerCase()); - }), - ); - - return [...areas].map((area) => { - return { - params: { - area, - }, - }; - }); -} - -export const GET: APIRoute = async ({ params }) => { - const products = await getCollection("products", (p) => { - if (!p.data.product.group) return false; - - return slug(p.data.product.group.toLowerCase()) === params.area; - }); - - const markdown = await getCollection("docs", (e) => { - if (!e.body) return false; - - if ( - e.id === "warp-client/legal/3rdparty" || - e.id === "magic-wan/legal/3rdparty" - ) - return false; - - return products.some((p) => - e.id.startsWith(p.data.product.url.slice(1, -1)), - ); - }) - .then((entries) => - entries.map((entry) => { - return [ - `# ${entry.data.title}`, - `URL: https://developers.cloudflare.com/${entry.id}/`, - `${entry.body?.trim()}`, - "---", - ].join("\n\n"); - }), - ) - .then((array) => array.join("\n\n")); - - return new Response(markdown, { - headers: { - "content-type": "text/plain", - }, - }); -}; diff --git a/src/pages/[product]/llms-full.txt.ts b/src/pages/[product]/llms-full.txt.ts deleted file mode 100644 index 057b9f6f4544da4..000000000000000 --- a/src/pages/[product]/llms-full.txt.ts +++ /dev/null @@ -1,50 +0,0 @@ -import type { APIRoute } from "astro"; -import { getCollection } from "astro:content"; - -export async function getStaticPaths() { - const products = await getCollection("products", (p) => { - return p.data.product.group; - }); - - return products.map((entry) => { - return { - params: { - product: entry.id, - }, - props: { - product: entry, - }, - }; - }); -} - -export const GET: APIRoute = async ({ props }) => { - const markdown = await getCollection("docs", (e) => { - if ( - e.id === "warp-client/legal/3rdparty" || - e.id === "magic-wan/legal/3rdparty" - ) - return false; - - return ( - e.id.startsWith(props.product.data.product.url.slice(1, -1)) && e.body - ); - }) - .then((entries) => - entries.map((entry) => { - return [ - `# ${entry.data.title}`, - `URL: https://developers.cloudflare.com/${entry.id}/`, - `${entry.body?.trim()}`, - "---", - ].join("\n\n"); - }), - ) - .then((array) => array.join("\n\n")); - - return new Response(markdown, { - headers: { - "content-type": "text/plain", - }, - }); -}; diff --git a/src/pages/llms-full.txt.ts b/src/pages/llms-full.txt.ts deleted file mode 100644 index 12fcd84f7c91a67..000000000000000 --- a/src/pages/llms-full.txt.ts +++ /dev/null @@ -1,33 +0,0 @@ -import type { APIRoute } from "astro"; -import { getCollection } from "astro:content"; - -export const GET: APIRoute = async () => { - const markdown = await getCollection("docs", (e) => { - if (!e.body) return false; - - if ( - e.id === "warp-client/legal/3rdparty" || - e.id === "magic-wan/legal/3rdparty" - ) - return false; - - return true; - }) - .then((entries) => - entries.map((entry) => { - return [ - `# ${entry.data.title}`, - `URL: https://developers.cloudflare.com/${entry.id}/`, - `${entry.body?.trim()}`, - "---", - ].join("\n\n"); - }), - ) - .then((array) => array.join("\n\n")); - - return new Response(markdown, { - headers: { - "content-type": "text/plain", - }, - }); -}; diff --git a/worker/index.ts b/worker/index.ts index 728b316593dd2c7..1e05a69b420ab46 100644 --- a/worker/index.ts +++ b/worker/index.ts @@ -22,6 +22,17 @@ export default class extends WorkerEntrypoint { }); } + if (request.url.endsWith("/llms-full.txt")) { + const { pathname } = new URL(request.url); + const res = await this.env.VENDORED_MARKDOWN.get(pathname.slice(1)); + + return new Response(res?.body, { + headers: { + "Content-Type": "text/markdown; charset=utf-8", + }, + }); + } + if (request.url.endsWith("/index.md")) { const htmlUrl = request.url.replace("index.md", ""); const res = await this.env.ASSETS.fetch(htmlUrl, request); diff --git a/worker/index.worker.test.ts b/worker/index.worker.test.ts index 7916f337b989222..19d02a57e68b2d4 100644 --- a/worker/index.worker.test.ts +++ b/worker/index.worker.test.ts @@ -204,41 +204,6 @@ describe("Cloudflare Docs", () => { const text = await response.text(); expect(text).toContain("# Cloudflare Developer Documentation"); }); - - it("llms-full.txt", async () => { - const request = new Request("http://fakehost/llms-full.txt"); - const response = await SELF.fetch(request); - - expect(response.status).toBe(200); - - const text = await response.text(); - expect(text).toContain("URL: https://developers.cloudflare.com/"); - expect(text).toContain('from "~/components"'); - }); - - it("product-specific llms-full.txt", async () => { - const request = new Request("http://fakehost/workers/llms-full.txt"); - const response = await SELF.fetch(request); - - expect(response.status).toBe(200); - - const text = await response.text(); - expect(text).toContain("URL: https://developers.cloudflare.com/"); - expect(text).toContain('from "~/components"'); - }); - - it("area-specific llms-full.txt", async () => { - const request = new Request( - "http://fakehost/developer-platform/llms-full.txt", - ); - const response = await SELF.fetch(request); - - expect(response.status).toBe(200); - - const text = await response.text(); - expect(text).toContain("URL: https://developers.cloudflare.com/"); - expect(text).toContain('from "~/components"'); - }); }); describe("index.md handling", () => {