Merge pull request #17 from justyns/justyns/issue4

Add ability to configure multiple text and image models, and switch between them
justyns · Mar 1, 2024 · 250a1a0 · 250a1a0
2 parents aacd5d4 + c8832ab
commit 250a1a0
Show file tree

Hide file tree

Showing 6 changed files with 409 additions and 132 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,8 @@ The resulting image is then uploaded to the space and inserted into the note wit
 New responses are always appended to the end of the page.
 - **AI: Execute AI Prompt from Custom Template**: Prompts the user to select a template, renders that template, sends it to the LLM, and then inserts the result into the page.
 Valid templates must have a value for aiprompt.description in the frontmatter.
+- **AI: Select Text Model from Config**: undefined
+- **AI: Select Image Model from Config**: undefined
 
 <!-- end-commands-and-functions -->
 
@@ -49,20 +51,42 @@ If you do not have a SECRETS page, create one and name it `SECRETS`. Then, inser
     OPENAI_API_KEY: "openai key here"
     ```
 
-OPENAI_API_KEY is required for any model currently, but may not get used for local models that don't use keys.
+OPENAI_API_KEY is required for any openai api compatible model currently, but may not get used for local models that don't use keys.
+
+The secret does not necessary have to be `OPENAI_API_KEY`, it can be any name you want as long as you also change the `secretName` for the model to match.  This allows you to have multiple api keys for the same provider as an example.
 
 ### Configuration
 
 To change the text generation model used by all commands, or other configurable options, open your `SETTINGS` page and change the setting below:
 
 ```yaml
 ai:
-  # By default, gpt-3.5-turbo is used, but can be changed if desired.
-  # defaultTextModel: gpt-4-0125-preview
-  defaultTextModel: gpt-3.5-turbo
-  # Any openai compatible API _should_ be usable, but no testing is done on that currently
-  openAIBaseUrl: "https://api.openai.com/v1",
-  dallEBaseUrl: "https://api.openai.com/v1",
+  # configure one or more image models.  Only OpenAI's api is currently supported
+  imageModels:
+  - name: dall-e-3
+    modelName: dall-e-3
+    provider: dalle
+  - name: dall-e-2
+    modelName: dall-e-2
+    provider: dalle
+
+  # Configure one or more text models
+  # Provider may be openai or gemini.  Most local or self-hosted LLMs offer an openai compatible api, so choose openai as the provider for those and change the baseUrl accordingly.
+  textModels:
+  - name: ollama-phi-2
+    modelName: phi-2
+    provider: openai
+    baseUrl: http://localhost:11434/v1
+    requireAuth: false
+  - name: gpt-4-turbo
+    provider: openai
+    modelName: gpt-4-0125-preview
+  - name: gpt-4-vision-preview
+    provider: openai
+    modelName: gpt-4-vision-preview
+  - name: gpt-3-turbo
+    provider: openai
+    modelName: gpt-3.5-turbo-0125
 
   # Chat section is optional, but may help provide better results when using the Chat On Page command
   chat:
@@ -79,10 +103,13 @@ To use Ollama locally, make sure you have it running first and the desired model
 
 ```yaml
 ai:
-  # Run `ollama list` to see a list of models downloaded
-  defaultTextModel: phi
-  openAIBaseUrl: http://localhost:11434/v1
-  requireAuth: false
+  textModels:
+  - name: ollama-phi-2
+    # Run `ollama list` to see a list of models downloaded
+    modelName: phi
+    provider: openai
+    baseUrl: http://localhost:11434/v1
+    requireAuth: false
 ```
 
 **requireAuth**: When using Ollama and chrome, requireAuth needs to be set to false so that the Authorization header isn't set.  Otherwise you will get a CORS error.
@@ -93,20 +120,28 @@ Mistral.ai is a hosted service that offers an openai-compatible api.  You can us
 
 ```yaml
 ai:
-  defaultTextModel: mistral-medium
-  openAIBaseUrl: https://api.mistral.ai/v1
+  textModels:
+    - name: mistral-medium
+      modelName: mistral-medium
+      provider: openai
+      baseUrl: https://api.mistral.ai/v1
+      secretName: MISTRAL_API_KEY
 ```
 
-`OPENAI_API_KEY` also needs to be set in `SECRETS` to an API key generated from their web console.
+`MISTRAL_API_KEY` also needs to be set in `SECRETS` using an api key generated from their web console.
+
 
 #### Perplexity.ai
 
 Perplexity.ai is another hosted service that offers an openai-compatible api and [various models](https://docs.perplexity.ai/docs/model-cards).  You can use it with settings like this:
 
 ```yaml
 ai:
-  defaultTextModel: sonar-medium-online
-  openAIBaseUrl: https://api.perplexity.ai
+  textModels:
+    - name: sonar-medium-online
+      modelName: sonar-medium-online
+      provider: openai
+      baseUrl: https://api.perplexity.ai
 ```
 
 `OPENAI_API_KEY` also needs to be set in `SECRETS` to an API key generated from [their web console](https://www.perplexity.ai/settings/api).
@@ -119,15 +154,37 @@ To configure it, you can use these settings:
 
 ```yaml
 ai:
-  secretName: GOOGLE_AI_STUDIO_KEY
-  provider: Gemini
-  defaultTextModel: gemini-pro
+  textModels:
+    - name: gemini-pro
+      modelName: gemini-pro
+      provider: gemini
+      baseUrl: https://api.gemini.ai/v1
+      secretName: GOOGLE_AI_STUDIO_KEY
 ```
 
 **Note**: The secretName defined means you need to put the api key from [google ai studio](https://aistudio.google.com/app/apikey) in your SECRETS file as `GOOGLE_AI_STUDIO_KEY`.
 
 **Note 2**: AI Studio is not the same as the Gemini App (previously Bard).  You may have access to https://gemini.google.com/app but it does not offer an api key needed for integrating 3rd party tools.  Instead, you need access to https://aistudio.google.com/app specifically.
 
+
+#### Dall-E
+
+Dall-E can be configured to use for generating images with these settings:
+
+```yaml
+ai:
+  imageModels:
+  - name: dall-e-3
+    modelName: dall-e-3
+    provider: dalle
+  - name: dall-e-2
+    modelName: dall-e-2
+    provider: dalle
+```
+
+`OPENAI_API_KEY` also needs to be set in `SECRETS` to an API key generated in the OpenAI web console.
+`baseUrl` can also be set to another api compatible with openai/dall-e.
+
 #### Chat Custom Instructions
 
 OpenAI introduced [custom instructions for ChatGPT](https://openai.com/blog/custom-instructions-for-chatgpt) a while back to help improve the responses from ChatGPT.  We are emulating that feature by allowing a system prompt to be injected into each new chat session.

diff --git a/sbai.ts b/sbai.ts
@@ -6,18 +6,27 @@ import { editor, markdown, space } from "$sb/syscalls.ts";
 import { decodeBase64 } from "https://deno.land/std@0.216.0/encoding/base64.ts";
 import { getPageLength, getSelectedTextOrNote } from "./src/editorUtils.ts";
 import {
+  aiSettings,
   ChatMessage,
   chatSystemPrompt,
+  configureSelectedImageModel,
+  configureSelectedModel,
   currentAIProvider,
+  currentImageProvider,
+  ImageModelConfig,
   initializeOpenAI,
   initIfNeeded,
+  ModelConfig,
+  setSelectedImageModel,
+  setSelectedTextModel,
 } from "./src/init.ts";
 import { generateImageWithDallE } from "./src/openai.ts";
 import {
   convertPageToMessages,
   enrichChatMessages,
   folderName,
 } from "./src/utils.ts";
+import { ImageGenerationOptions } from "./src/interfaces.ts";
 
 /**
  * Reloads the api key and aiSettings object if one of the pages change.
@@ -27,10 +36,61 @@ import {
  */
 export async function reloadConfig(pageName: string) {
   if (pageName === "SETTINGS" || pageName === "SECRETS") {
-    await initializeOpenAI();
+    await initializeOpenAI(true);
   }
 }
 
+export async function selectModelFromConfig() {
+  if (!aiSettings || !aiSettings.textModels) {
+    await initializeOpenAI(false);
+  }
+  const modelOptions = aiSettings.textModels.map((model) => ({
+    ...model,
+    name: model.name,
+    description: model.description || `${model.modelName} on ${model.provider}`,
+  }));
+  const selectedModel = await editor.filterBox("Select a model", modelOptions);
+
+  if (!selectedModel) {
+    await editor.flashNotification("No model selected.", "error");
+    return;
+  }
+  const selectedModelName = selectedModel.name;
+  await setSelectedTextModel(selectedModel as ModelConfig);
+  await configureSelectedModel(selectedModel as ModelConfig);
+
+  await editor.flashNotification(`Selected model: ${selectedModelName}`);
+  console.log(`Selected model:`, selectedModel);
+}
+
+export async function selectImageModelFromConfig() {
+  if (!aiSettings || !aiSettings.imageModels) {
+    await initializeOpenAI(false);
+  }
+  const imageModelOptions = aiSettings.imageModels.map((model) => ({
+    ...model,
+    name: model.name,
+    description: model.description || `${model.modelName} on ${model.provider}`,
+  }));
+  const selectedImageModel = await editor.filterBox(
+    "Select an image model",
+    imageModelOptions,
+  );
+
+  if (!selectedImageModel) {
+    await editor.flashNotification("No image model selected.", "error");
+    return;
+  }
+  const selectedImageModelName = selectedImageModel.name;
+  await setSelectedImageModel(selectedImageModel as ImageModelConfig);
+  await configureSelectedImageModel(selectedImageModel as ImageModelConfig);
+
+  await editor.flashNotification(
+    `Selected image model: ${selectedImageModelName}`,
+  );
+  console.log(`Selected image model:`, selectedImageModel);
+}
+
 /**
  * Prompts the user for a custom prompt to send to the LLM.  If the user has text selected, the selected text is used as the note content.
  * If the user has no text selected, the entire note is used as the note content.
@@ -194,7 +254,14 @@ export async function promptAndGenerateImage() {
       return;
     }
 
-    const imageData = await generateImageWithDallE(prompt, 1);
+    const imageOptions: ImageGenerationOptions = {
+      prompt: prompt,
+      numImages: 1,
+      size: "1024x1024",
+      quality: "hd",
+    };
+    const imageData = await currentImageProvider.generateImage(imageOptions);
+
     if (imageData && imageData.data && imageData.data.length > 0) {
       const base64Image = imageData.data[0].b64_json;
       const revisedPrompt = imageData.data[0].revised_prompt;

diff --git a/silverbullet-ai.plug.yaml b/silverbullet-ai.plug.yaml
@@ -45,4 +45,12 @@ functions:
     path: src/prompts.ts:insertAiPromptFromTemplate
     command:
       name: "AI: Execute AI Prompt from Custom Template"
+  selectTextModel:
+    path: sbai.ts:selectModelFromConfig
+    command:
+      name: "AI: Select Text Model from Config"
+  selectImageModel:
+    path: sbai.ts:selectImageModelFromConfig
+    command:
+      name: "AI: Select Image Model from Config"