Skip to content

Commit

Permalink
Point to gguf format for lora
Browse files Browse the repository at this point in the history
  • Loading branch information
ltoniazzi authored and amakropoulos committed Aug 21, 2024
1 parent f99ee2f commit 225957d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 17 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ public class MyScript : MonoBehaviour
// Otherwise the model file can be copied directly inside the StreamingAssets folder.
llm.SetModel("Phi-3-mini-4k-instruct-q4.gguf");
// optional: you can also set a lora in a similar fashion
llm.SetLora("my-lora.bin");
llm.SetLora("my-lora.gguf");
// optional: you can set the chat template of the model if it is not correctly identified
// You can find a list of chat templates in the ChatTemplate.templates.Keys
llm.SetTemplate("phi-3");
Expand Down Expand Up @@ -374,8 +374,8 @@ If the user's GPU is not supported, the LLM will fall back to the CPU

- <details><summary>Advanced options</summary>

- `Download lora` click to download a LoRA model in .bin format
- `Load lora` click to load a LoRA model in .bin format
- `Download lora` click to download a LoRA model in .gguf format
- `Load lora` click to load a LoRA model in .gguf format
- <details><summary><code>Context Size</code> size of the prompt context (0 = context size of the model)</summary> This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU). </details>
- `Batch Size` batch size for prompt processing (default: 512)
- `Model` the path of the model being used (relative to the Assets/StreamingAssets folder)
Expand Down
28 changes: 14 additions & 14 deletions Runtime/LLM.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public LLMException(string message, int errorCode) : base(message)
}
}

public class DestroyException : Exception {}
public class DestroyException : Exception { }
/// \endcond

[DefaultExecutionOrder(-1)]
Expand Down Expand Up @@ -72,7 +72,7 @@ public class LLM : MonoBehaviour
/// <summary> Chat template used for the model </summary>
[ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate;
/// <summary> the paths of the LORA models being used (relative to the Assets/StreamingAssets folder).
/// Models with .bin format are allowed.</summary>
/// Models with .gguf format are allowed.</summary>
[ModelAdvanced] public string lora = "";

/// \cond HIDE
Expand Down Expand Up @@ -192,9 +192,9 @@ public void SetModel(string path)
/// <summary>
/// Allows to set a LORA model to use in the LLM.
/// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build.
/// Models supported are in .bin format.
/// Models supported are in .gguf format.
/// </summary>
/// <param name="path">path to LORA model to use (.bin format)</param>
/// <param name="path">path to LORA model to use (.gguf format)</param>
public void SetLora(string path)
{
lora = "";
Expand All @@ -204,9 +204,9 @@ public void SetLora(string path)
/// <summary>
/// Allows to add a LORA model to use in the LLM.
/// The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build.
/// Models supported are in .bin format.
/// Models supported are in .gguf format.
/// </summary>
/// <param name="path">path to LORA model to use (.bin format)</param>
/// <param name="path">path to LORA model to use (.gguf format)</param>
public void AddLora(string path)
{
string loraPath = GetModelLoraPath(path, true);
Expand All @@ -220,9 +220,9 @@ public void AddLora(string path)

/// <summary>
/// Allows to remove a LORA model from the LLM.
/// Models supported are in .bin format.
/// Models supported are in .gguf format.
/// </summary>
/// <param name="path">path to LORA model to remove (.bin format)</param>
/// <param name="path">path to LORA model to remove (.gguf format)</param>
public void RemoveLora(string path)
{
string loraPath = GetModelLoraPath(path, true);
Expand Down Expand Up @@ -373,7 +373,7 @@ void CallIfNotDestroyed(EmptyCallback fn)
private void InitService(string arguments)
{
if (debug) CallIfNotDestroyed(() => SetupLogging());
CallIfNotDestroyed(() => {LLMObject = llmlib.LLM_Construct(arguments);});
CallIfNotDestroyed(() => { LLMObject = llmlib.LLM_Construct(arguments); });
if (remote) CallIfNotDestroyed(() => llmlib.LLM_StartServer(LLMObject));
CallIfNotDestroyed(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
CallIfNotDestroyed(() => CheckLLMStatus(false));
Expand All @@ -383,7 +383,7 @@ private void StartService()
{
llmThread = new Thread(() => llmlib.LLM_Start(LLMObject));
llmThread.Start();
while (!llmlib.LLM_Started(LLMObject)) {}
while (!llmlib.LLM_Started(LLMObject)) { }
loraWeights = new List<float>();
for (int i = 0; i < lora.Split(" ").Count(); i++) loraWeights.Add(1f);
started = true;
Expand Down Expand Up @@ -446,7 +446,7 @@ void AssertStarted()

void CheckLLMStatus(bool log = true)
{
if (llmlib == null) {return;}
if (llmlib == null) { return; }
IntPtr stringWrapper = llmlib.StringWrapper_Construct();
int status = llmlib.LLM_Status(LLMObject, stringWrapper);
string result = llmlib.GetStringWrapperResult(stringWrapper);
Expand Down Expand Up @@ -553,7 +553,7 @@ public async Task<string> SetLoraScale(string loraToScale, float scale)
loraWeightRequest.loraWeights = new List<LoraWeightRequest>();
for (int i = 0; i < loraWeights.Count; i++)
{
loraWeightRequest.loraWeights.Add(new LoraWeightRequest() {id = i, scale = loraWeights[i]});
loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = loraWeights[i] });
}
;

Expand Down Expand Up @@ -607,7 +607,7 @@ public async Task<string> Slot(string json)
public async Task<string> Completion(string json, Callback<string> streamCallback = null)
{
AssertStarted();
if (streamCallback == null) streamCallback = (string s) => {};
if (streamCallback == null) streamCallback = (string s) => { };
StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback);
await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper()));
if (!started) return null;
Expand All @@ -621,7 +621,7 @@ public async Task<string> Completion(string json, Callback<string> streamCallbac
public async Task SetBasePrompt(string base_prompt)
{
AssertStarted();
SystemPromptRequest request = new SystemPromptRequest(){system_prompt = base_prompt, prompt = " ", n_predict = 0};
SystemPromptRequest request = new SystemPromptRequest() { system_prompt = base_prompt, prompt = " ", n_predict = 0 };
await Completion(JsonUtility.ToJson(request));
}

Expand Down

0 comments on commit 225957d

Please sign in to comment.