diff --git a/astro.config.ts b/astro.config.ts index 2e2e4b9071c80e..4a2f11bf5f7309 100644 --- a/astro.config.ts +++ b/astro.config.ts @@ -147,6 +147,10 @@ export default defineConfig({ "/workers/examples/?languages=*", "/workers/examples/?tags=*", "/workers-ai/models/**", + "**index.md", + "/markdown.zip", + "/style-guide/index.md", + "/style-guide/fixtures/markdown/index.md", ], }), ] diff --git a/src/assets/images/style-guide/how-we-docs/page-options.png b/src/assets/images/style-guide/how-we-docs/page-options.png new file mode 100644 index 00000000000000..3e826f98204a8f Binary files /dev/null and b/src/assets/images/style-guide/how-we-docs/page-options.png differ diff --git a/src/content/docs/style-guide/how-we-docs/ai-consumability.mdx b/src/content/docs/style-guide/how-we-docs/ai-consumability.mdx new file mode 100644 index 00000000000000..284685e2050ae3 --- /dev/null +++ b/src/content/docs/style-guide/how-we-docs/ai-consumability.mdx @@ -0,0 +1,92 @@ +--- +pcx_content_type: how-to +title: AI consumability +meta: + title: AI consumability | How we docs +--- + +import { Tabs, TabItem, Width } from "~/components"; + +We have various approaches for making our content visible to AI as well as making sure it's easily consumed in a plain-text format. + +## AI discoverability + +The primary proposal in this space is [`llms.txt`](https://llmstxt.org/), offering a well-known path for a Markdown list of all your pages. + +We have implemented `llms.txt`, `llms-full.txt` and also created per-page Markdown links as follows: + +- [`llms.txt`](/llms.txt) +- [`llms-full.txt`](/llms-full.txt) + - We also provide a `llms-full.txt` file on a per-product basis, i.e [`/workers/llms-full.txt`](/workers/llms-full.txt) +- [`/$page/index.md`](index.md) + - Add `/index.md` to the end of any page to get the Markdown version, i.e [`/style-guide/index.md`](/style-guide/index.md) +- [`/markdown.zip`](/markdown.zip) + - An export of all of our documentation in the aforementioned `index.md` format. + +In the top right of this page, you will see a `Page options` button where you can copy the current page as Markdown that can be given to your LLM of choice. + + + ![Page options + button](~/assets/images/style-guide/how-we-docs/page-options.png) + + +## Textual representation of interactive elements + +HTML is easily parsed - after all, the browser has to parse it to decide how to render the page you're reading now - it tends to not be very _portable_. This limitation is especially painful in an AI context, because all the extra presentation information consumes additional tokens. + +For example, given our [`Tabs`](/style-guide/components/tabs/), the panels are hidden until the tab itself is clicked: + + + One Content + Two Content + + +If we run the resulting HTML from this component through a solution like [`turndown`](https://www.npmjs.com/package/turndown): + +```md +- [One](#tab-panel-6) +- [Two](#tab-panel-7) + +One Content + +Two Content +``` + +The references to the panels `id`, usually handled by JavaScript, are visible but non-functional. + +### Turning our components into "Markdownable" HTML + +To solve this, we created a [`rehype plugin`](https://github.com/cloudflare/cloudflare-docs/blob/d5a19deded110bce6a7c5d45e702d36527da0a4e/src/plugins/rehype/filter-elements.ts) for: + +- Removing non-content tags (`script`, `style`, `link`, etc) via a [tags allowlist](https://github.com/cloudflare/cloudflare-docs/blob/d5a19deded110bce6a7c5d45e702d36527da0a4e/src/plugins/rehype/filter-elements.ts#L19-L104) +- [Transforming custom elements](https://github.com/cloudflare/cloudflare-docs/blob/d5a19deded110bce6a7c5d45e702d36527da0a4e/src/plugins/rehype/filter-elements.ts#L189-L227) like `starlight-tabs` into standard unordered lists +- [Adapting our Expressive Code codeblocks HTML](https://github.com/cloudflare/cloudflare-docs/blob/d5a19deded110bce6a7c5d45e702d36527da0a4e/src/plugins/rehype/filter-elements.ts#L143-L178) to the [HTML that CommonMark expects](https://spec.commonmark.org/0.31.2/#example-142) + +Taking the `Tabs` example from the previous section and running it through our plugin will now give us a normal unordered list with the content properly associated with a given list item: + +```md +- One + + One Content + +- Two + + Two Content +``` + +For example, take a look at our Markdown test fixture (or any page by appending `/index.md` to the URL): + +- [`/style-guide/fixtures/markdown/`](/style-guide/fixtures/markdown/) +- [`/style-guide/fixtures/markdown/index.md`](/style-guide/fixtures/markdown/index.md) + +### Saving on tokens + +Most AI pricing is around input & output tokens and our approach greatly reduces the amount of input tokens required. + +For example, let's take a look at the amount of tokens required for the [Workers Get Started](/workers/get-started/guide/) using [OpenAI's tokenizer](https://platform.openai.com/tokenizer): + +- HTML: 15,229 tokens +- turndown: 3,401 tokens (4.48x less than HTML) +- index.md: 2,110 tokens (7.22x less than HTML) + +When providing our content to AI, we can see a real-world ~7x saving in input tokens cost.