Skip to content

Commit

Permalink
allow to set the slot of LLMCharacter
Browse files Browse the repository at this point in the history
  • Loading branch information
amakropoulos committed Sep 5, 2024
1 parent 6973753 commit 0f56a0a
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU
- `Debug` select to log the output of the model in the Unity Editor
- <details><summary>Advanced options</summary>

- `Parallel Prompts` number of prompts that can happen in parallel (default: -1 = number of LLMCharacter objects)
- <details><summary><code>Parallel Prompts</code> number of prompts / slots that can happen in parallel (default: -1 = number of LLMCharacter objects). Note that the context size is divided among the slots.</summary> If you want to retain as much context for the LLM and don't need all the characters present at the same time, you can set this number and specify the slot for each LLMCharacter object.
e.g. Setting `Parallel Prompts` to 1 and slot 0 for all LLMCharacter objects will use the full context, but the entire prompt will need to be computed (no caching) whenever a LLMCharacter object is used for chat. </details>
- `Dont Destroy On Load` select to not destroy the LLM GameObject when loading a new Scene

</details>
Expand Down Expand Up @@ -441,6 +442,7 @@ If it is not selected, the full reply from the model is received in one go
- `Load grammar` click to load a grammar in .gbnf format
- `Grammar` the path of the grammar being used (relative to the Assets/StreamingAssets folder)
- <details><summary><code>Cache Prompt</code> save the ongoing prompt from the chat (default: true)</summary> Saves the prompt while it is being created by the chat to avoid reprocessing the entire prompt every time</details>
- `Slot` slot of the server to use for computation. Value can be set from 0 to `Parallel Prompts`-1 (default: -1 = new slot for each character)
- `Seed` seed for reproducibility. For random results every time use -1
- <details><summary><code>Num Predict</code> maximum number of tokens to predict (default: 256, -1 = infinity, -2 = until context filled)</summary>This is the maximum amount of tokens the model will maximum predict. When N tokens are reached the model will stop generating. This means words / sentences might not get finished if this is too low. </details>
- <details><summary><code>Temperature</code> LLM temperature, lower values give more deterministic answers (default: 0.2)</summary>The temperature setting adjusts how random the generated responses are. Turning it up makes the generated choices more varied and unpredictable. Turning it down makes the generated responses more predictable and focused on the most likely options.</details>
Expand Down
4 changes: 3 additions & 1 deletion Runtime/LLM.cs
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,9 @@ private void StartService()
public int Register(LLMCharacter llmCharacter)
{
clients.Add(llmCharacter);
return clients.IndexOf(llmCharacter);
int index = clients.IndexOf(llmCharacter);
if (parallelPrompts != -1) return index % parallelPrompts;
return index;
}

protected int GetNumClients()
Expand Down
13 changes: 8 additions & 5 deletions Runtime/LLMCharacter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public class LLMCharacter : MonoBehaviour
[ModelAdvanced] public string grammar = null;
/// <summary> option to cache the prompt as it is being created by the chat to avoid reprocessing the entire prompt every time (default: true) </summary>
[ModelAdvanced] public bool cachePrompt = true;
/// <summary> specify which slot of the server to use for computation (affects caching) </summary>
[ModelAdvanced] public int slot = -1;
/// <summary> seed for reproducibility. For random results every time set to -1. </summary>
[ModelAdvanced] public int seed = 0;
/// <summary> number of tokens to predict (-1 = infinity, -2 = until context filled).
Expand Down Expand Up @@ -123,7 +125,6 @@ public class LLMCharacter : MonoBehaviour
private string chatTemplate;
private ChatTemplate template = null;
public string grammarString;
protected int id_slot = -1;
private List<(string, string)> requestHeaders = new List<(string, string)> { ("Content-Type", "application/json") };
private List<UnityWebRequest> WIPRequests = new List<UnityWebRequest>();
/// \endcond
Expand All @@ -149,7 +150,8 @@ public void Awake()
LLMUnitySetup.LogError($"No LLM assigned or detected for LLMCharacter {name}!");
return;
}
id_slot = llm.Register(this);
int slotFromServer = llm.Register(this);
if (slot == -1) slot = slotFromServer;
}

InitGrammar();
Expand All @@ -159,6 +161,7 @@ public void Awake()
void OnValidate()
{
AssignLLM();
if (llm != null && llm.parallelPrompts > -1 && (slot < -1 || slot >= llm.parallelPrompts)) LLMUnitySetup.LogError($"The slot needs to be between 0 and {llm.parallelPrompts-1}, or -1 to be automatically set");
}

void Reset()
Expand Down Expand Up @@ -358,7 +361,7 @@ ChatRequest GenerateRequest(string prompt)
ChatRequest chatRequest = new ChatRequest();
if (debugPrompt) LLMUnitySetup.Log(prompt);
chatRequest.prompt = prompt;
chatRequest.id_slot = id_slot;
chatRequest.id_slot = slot;
chatRequest.temperature = temperature;
chatRequest.top_k = topK;
chatRequest.top_p = topP;
Expand Down Expand Up @@ -613,7 +616,7 @@ public async Task<List<float>> Embeddings(string query, Callback<List<float>> ca
private async Task<string> Slot(string filepath, string action)
{
SlotRequest slotRequest = new SlotRequest();
slotRequest.id_slot = id_slot;
slotRequest.id_slot = slot;
slotRequest.filepath = filepath;
slotRequest.action = action;
string json = JsonUtility.ToJson(slotRequest);
Expand Down Expand Up @@ -683,7 +686,7 @@ protected Ret ConvertContent<Res, Ret>(string response, ContentCallback<Res, Ret

protected void CancelRequestsLocal()
{
if (id_slot >= 0) llm.CancelRequest(id_slot);
if (slot >= 0) llm.CancelRequest(slot);
}

protected void CancelRequestsRemote()
Expand Down

0 comments on commit 0f56a0a

Please sign in to comment.