From 74136850eb78d0cd768a58d8ce6abd6c00f360c9 Mon Sep 17 00:00:00 2001 From: Duong Date: Thu, 21 Nov 2024 23:07:35 +0700 Subject: [PATCH 1/5] Init: streaming ability --- app/SharedWebComponents/Pages/Chat.razor | 4 + app/SharedWebComponents/Pages/Chat.razor.cs | 43 +++++- app/SharedWebComponents/Services/ApiClient.cs | 22 +++ .../Extensions/WebApplicationExtensions.cs | 22 +++ .../Services/ReadRetrieveReadChatService.cs | 129 +++++++++++++++++- 5 files changed, 214 insertions(+), 6 deletions(-) diff --git a/app/SharedWebComponents/Pages/Chat.razor b/app/SharedWebComponents/Pages/Chat.razor index a36b4467..ff0ded0f 100644 --- a/app/SharedWebComponents/Pages/Chat.razor +++ b/app/SharedWebComponents/Pages/Chat.razor @@ -88,6 +88,10 @@ OnClick="@OnClearChat" Disabled=@(_isReceivingResponse || _questionAndAnswerMap is { Count: 0 }) /> + + + diff --git a/app/SharedWebComponents/Pages/Chat.razor.cs b/app/SharedWebComponents/Pages/Chat.razor.cs index 8211087a..6e11898a 100644 --- a/app/SharedWebComponents/Pages/Chat.razor.cs +++ b/app/SharedWebComponents/Pages/Chat.razor.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. namespace SharedWebComponents.Pages; +using System.Text; public sealed partial class Chat { @@ -8,6 +9,7 @@ public sealed partial class Chat private UserQuestion _currentQuestion; private string _lastReferenceQuestion = ""; private bool _isReceivingResponse = false; + private bool _useStreaming = false; private readonly Dictionary _questionAndAnswerMap = []; @@ -42,17 +44,50 @@ private async Task OnAskClickedAsync() try { var history = _questionAndAnswerMap - .Where(x => x.Value?.Choices is { Length: > 0}) + .Where(x => x.Value?.Choices is { Length: > 0 }) .SelectMany(x => new ChatMessage[] { new ChatMessage("user", x.Key.Question), new ChatMessage("assistant", x.Value!.Choices[0].Message.Content) }) .ToList(); history.Add(new ChatMessage("user", _userQuestion)); var request = new ChatRequest([.. history], Settings.Overrides); - var result = await ApiClient.ChatConversationAsync(request); - _questionAndAnswerMap[_currentQuestion] = result.Response; - if (result.IsSuccessful) + if (_useStreaming) + { + var streamingResponse = new StringBuilder(); + try + { + await foreach (var chunk in await ApiClient.PostStreamingRequestAsync(request, "api/chat/stream")) + { + streamingResponse.Append(chunk.Text); + + _questionAndAnswerMap[_currentQuestion] = new ChatAppResponseOrError( + new[] { + new ResponseChoice(0, + new ResponseMessage("assistant", streamingResponse.ToString()), + null, null) + }, + null); + + StateHasChanged(); + + await Task.Delay(10); + } + } + catch (Exception ex) + { + _questionAndAnswerMap[_currentQuestion] = new ChatAppResponseOrError( + Array.Empty(), + ex.Message); + } + } + else + { + var result = await ApiClient.ChatConversationAsync(request); + _questionAndAnswerMap[_currentQuestion] = result.Response; + } + + if (_questionAndAnswerMap[_currentQuestion]?.Error is null) { _userQuestion = ""; _currentQuestion = default; diff --git a/app/SharedWebComponents/Services/ApiClient.cs b/app/SharedWebComponents/Services/ApiClient.cs index 9b15f7fd..462e79f3 100644 --- a/app/SharedWebComponents/Services/ApiClient.cs +++ b/app/SharedWebComponents/Services/ApiClient.cs @@ -92,6 +92,8 @@ public async IAsyncEnumerable GetDocumentsAsync( public Task> ChatConversationAsync(ChatRequest request) => PostRequestAsync(request, "api/chat"); + public Task> ChatConversationStreamingAsync(ChatRequest request) => PostRequestAsync(request, "api/chat/stream"); + private async Task> PostRequestAsync( TRequest request, string apiRoute) where TRequest : ApproachRequest { @@ -133,4 +135,24 @@ private async Task> PostRequestAsync( }; } } + + public async Task> PostStreamingRequestAsync( + TRequest request, string apiRoute) where TRequest : ApproachRequest + { + var json = JsonSerializer.Serialize( + request, + SerializerOptions.Default); + + using var body = new StringContent( + json, Encoding.UTF8, "application/json"); + + var response = await httpClient.PostAsync(apiRoute, body); + + if (response.IsSuccessStatusCode) + { + return response.Content.ReadFromJsonAsAsyncEnumerable(); + } + + throw new HttpRequestException($"HTTP {(int)response.StatusCode} : {response.ReasonPhrase ?? "Unknown error"}"); + } } diff --git a/app/backend/Extensions/WebApplicationExtensions.cs b/app/backend/Extensions/WebApplicationExtensions.cs index 64464f52..69eff585 100644 --- a/app/backend/Extensions/WebApplicationExtensions.cs +++ b/app/backend/Extensions/WebApplicationExtensions.cs @@ -25,6 +25,9 @@ internal static WebApplication MapApi(this WebApplication app) api.MapGet("enableLogout", OnGetEnableLogout); + // Add streaming chat endpoint + api.MapPost("chat/stream", OnPostChatStreamingAsync); + return app; } @@ -86,6 +89,25 @@ private static async Task OnPostChatAsync( return Results.BadRequest(); } + private static async IAsyncEnumerable OnPostChatStreamingAsync( + ChatRequest request, + ReadRetrieveReadChatService chatService, + [EnumeratorCancellation] CancellationToken cancellationToken) + { + if (request is not { History.Length: > 0 }) + { + yield break; + } + + await foreach (var chunk in chatService.ReplyStreamingAsync( + request.History, + request.Overrides, + cancellationToken)) + { + yield return new ChatChunkResponse(chunk.Length, chunk.Text); + } + } + private static async Task OnPostDocumentAsync( [FromForm] IFormFileCollection files, [FromServices] AzureBlobStorageService service, diff --git a/app/backend/Services/ReadRetrieveReadChatService.cs b/app/backend/Services/ReadRetrieveReadChatService.cs index 7c72dcd9..b6429329 100644 --- a/app/backend/Services/ReadRetrieveReadChatService.cs +++ b/app/backend/Services/ReadRetrieveReadChatService.cs @@ -110,7 +110,7 @@ standard plan AND dental AND employee benefit. } else { - documentContents = string.Join("\r", documentContentList.Select(x =>$"{x.Title}:{x.Content}")); + documentContents = string.Join("\r", documentContentList.Select(x => $"{x.Title}:{x.Content}")); } // step 2.5 @@ -140,7 +140,7 @@ standard plan AND dental AND employee benefit. } } - + if (images != null) { var prompt = @$"## Source ## @@ -244,4 +244,129 @@ You answer needs to be a json object with the following format. return new ChatAppResponse(new[] { choice }); } + + public async IAsyncEnumerable ReplyStreamingAsync( + ChatMessage[] history, + RequestOverrides? overrides, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var chat = _kernel.GetRequiredService(); + var embedding = _kernel.GetRequiredService(); + float[]? embeddings = null; + var question = history.LastOrDefault(m => m.IsUser)?.Content is { } userQuestion + ? userQuestion + : throw new InvalidOperationException("User question is null"); + + // Generate embeddings if needed + if (overrides?.RetrievalMode != RetrievalMode.Text && embedding is not null) + { + embeddings = (await embedding.GenerateEmbeddingAsync(question, cancellationToken: cancellationToken)).ToArray(); + } + + // Get search query + string? query = null; + if (overrides?.RetrievalMode != RetrievalMode.Vector) + { + var getQueryChat = new ChatHistory(@"You are a helpful AI assistant, generate search query for followup question. +Make your respond simple and precise. Return the query only, do not return any other text. +e.g. +Northwind Health Plus AND standard plan. +standard plan AND dental AND employee benefit. +"); + + getQueryChat.AddUserMessage(question); + var result = await chat.GetChatMessageContentAsync( + getQueryChat, + cancellationToken: cancellationToken); + + query = result.Content ?? throw new InvalidOperationException("Failed to get search query"); + } + + // Search related documents + var documentContentList = await _searchClient.QueryDocumentsAsync(query, embeddings, overrides, cancellationToken); + string documentContents = documentContentList.Length == 0 + ? "no source available." + : string.Join("\r", documentContentList.Select(x => $"{x.Title}:{x.Content}")); + + // Get images if vision service available + SupportingImageRecord[]? images = default; + if (_visionService is not null) + { + var queryEmbeddings = await _visionService.VectorizeTextAsync(query ?? question, cancellationToken); + images = await _searchClient.QueryImagesAsync(query, queryEmbeddings.vector, overrides, cancellationToken); + } + + // Prepare chat history + var answerChat = new ChatHistory( + "You are a system assistant who helps the company employees with their questions. Be brief in your answers"); + + foreach (var message in history) + { + if (message.IsUser) + { + answerChat.AddUserMessage(message.Content); + } + else + { + answerChat.AddAssistantMessage(message.Content); + } + } + + // Add final prompt with context + if (images != null) + { + var prompt = @$"## Source ## +{documentContents} +## End ## + +Answer question based on available source and images. +Your answer needs to be a json object with answer and thoughts field. +Don't put your answer between ```json and ```, return the json string directly. e.g {{""answer"": ""I don't know"", ""thoughts"": ""I don't know""}}"; + + var tokenRequestContext = new TokenRequestContext(new[] { "https://storage.azure.com/.default" }); + var sasToken = await (_tokenCredential?.GetTokenAsync(tokenRequestContext, cancellationToken) ?? throw new InvalidOperationException("Failed to get token")); + var imageUrls = images.Select(x => $"{x.Url}?{sasToken.Token}").ToArray(); + var collection = new ChatMessageContentItemCollection(); + collection.Add(new TextContent(prompt)); + foreach (var imageUrl in imageUrls) + { + collection.Add(new ImageContent(new Uri(imageUrl))); + } + + answerChat.AddUserMessage(collection); + } + else + { + var prompt = @$" ## Source ## +{documentContents} +## End ## + +You answer needs to be a json object with the following format. +{{ + ""answer"": // the answer to the question, add a source reference to the end of each sentence. e.g. Apple is a fruit [reference1.pdf][reference2.pdf]. If no source available, put the answer as I don't know. + ""thoughts"": // brief thoughts on how you came up with the answer, e.g. what sources you used, what you thought about, etc. +}}"; + answerChat.AddUserMessage(prompt); + } + + var promptExecutingSetting = new OpenAIPromptExecutionSettings + { + MaxTokens = 1024, + Temperature = overrides?.Temperature ?? 0.7, + StopSequences = [], + }; + + // Stream the response + await foreach (var content in chat.GetStreamingChatMessageContentsAsync( + answerChat, + executionSettings: promptExecutingSetting, + kernel: _kernel, + cancellationToken: cancellationToken)) + { + if (content.Content is { Length: > 0 }) + { + yield return new ChatChunkResponse(content.Content.Length, content.Content); + } + } + } } From e0e192b3540edef8dcf66a2e5c14648f6ffd6d84 Mon Sep 17 00:00:00 2001 From: Duong Date: Thu, 21 Nov 2024 23:24:00 +0700 Subject: [PATCH 2/5] response ChatAppResponse --- app/SharedWebComponents/Pages/Chat.razor.cs | 12 +- app/SharedWebComponents/Services/ApiClient.cs | 4 +- .../Extensions/WebApplicationExtensions.cs | 6 +- .../Services/ReadRetrieveReadChatService.cs | 141 +++++++++++++++++- 4 files changed, 146 insertions(+), 17 deletions(-) diff --git a/app/SharedWebComponents/Pages/Chat.razor.cs b/app/SharedWebComponents/Pages/Chat.razor.cs index 6e11898a..4020d189 100644 --- a/app/SharedWebComponents/Pages/Chat.razor.cs +++ b/app/SharedWebComponents/Pages/Chat.razor.cs @@ -54,23 +54,15 @@ private async Task OnAskClickedAsync() if (_useStreaming) { - var streamingResponse = new StringBuilder(); try { - await foreach (var chunk in await ApiClient.PostStreamingRequestAsync(request, "api/chat/stream")) + await foreach (var response in await ApiClient.PostStreamingRequestAsync(request, "api/chat/stream")) { - streamingResponse.Append(chunk.Text); - _questionAndAnswerMap[_currentQuestion] = new ChatAppResponseOrError( - new[] { - new ResponseChoice(0, - new ResponseMessage("assistant", streamingResponse.ToString()), - null, null) - }, + response.Choices, null); StateHasChanged(); - await Task.Delay(10); } } diff --git a/app/SharedWebComponents/Services/ApiClient.cs b/app/SharedWebComponents/Services/ApiClient.cs index 462e79f3..a668d757 100644 --- a/app/SharedWebComponents/Services/ApiClient.cs +++ b/app/SharedWebComponents/Services/ApiClient.cs @@ -136,7 +136,7 @@ private async Task> PostRequestAsync( } } - public async Task> PostStreamingRequestAsync( + public async Task> PostStreamingRequestAsync( TRequest request, string apiRoute) where TRequest : ApproachRequest { var json = JsonSerializer.Serialize( @@ -150,7 +150,7 @@ public async Task> PostStreamingRequestAsync if (response.IsSuccessStatusCode) { - return response.Content.ReadFromJsonAsAsyncEnumerable(); + return response.Content.ReadFromJsonAsAsyncEnumerable(); } throw new HttpRequestException($"HTTP {(int)response.StatusCode} : {response.ReasonPhrase ?? "Unknown error"}"); diff --git a/app/backend/Extensions/WebApplicationExtensions.cs b/app/backend/Extensions/WebApplicationExtensions.cs index 69eff585..73e648e1 100644 --- a/app/backend/Extensions/WebApplicationExtensions.cs +++ b/app/backend/Extensions/WebApplicationExtensions.cs @@ -89,7 +89,7 @@ private static async Task OnPostChatAsync( return Results.BadRequest(); } - private static async IAsyncEnumerable OnPostChatStreamingAsync( + private static async IAsyncEnumerable OnPostChatStreamingAsync( ChatRequest request, ReadRetrieveReadChatService chatService, [EnumeratorCancellation] CancellationToken cancellationToken) @@ -99,12 +99,12 @@ private static async IAsyncEnumerable OnPostChatStreamingAsyn yield break; } - await foreach (var chunk in chatService.ReplyStreamingAsync( + await foreach (var response in chatService.ReplyStreamingAsync( request.History, request.Overrides, cancellationToken)) { - yield return new ChatChunkResponse(chunk.Length, chunk.Text); + yield return response; } } diff --git a/app/backend/Services/ReadRetrieveReadChatService.cs b/app/backend/Services/ReadRetrieveReadChatService.cs index b6429329..f86d1f59 100644 --- a/app/backend/Services/ReadRetrieveReadChatService.cs +++ b/app/backend/Services/ReadRetrieveReadChatService.cs @@ -4,6 +4,7 @@ using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Embeddings; +using System.Text; namespace MinimalApi.Services; #pragma warning disable SKEXP0011 // Mark members as static @@ -245,7 +246,7 @@ You answer needs to be a json object with the following format. return new ChatAppResponse(new[] { choice }); } - public async IAsyncEnumerable ReplyStreamingAsync( + public async IAsyncEnumerable ReplyStreamingAsync( ChatMessage[] history, RequestOverrides? overrides, [EnumeratorCancellation] CancellationToken cancellationToken = default) @@ -356,6 +357,13 @@ You answer needs to be a json object with the following format. StopSequences = [], }; + var streamingResponse = new StringBuilder(); + var documentContext = new ResponseContext( + DataPointsContent: documentContentList.Select(x => new SupportingContentRecord(x.Title, x.Content)).ToArray(), + DataPointsImages: images?.Select(x => new SupportingImageRecord(x.Title, x.Url)).ToArray(), + FollowupQuestions: Array.Empty(), // Will be populated after full response + Thoughts: Array.Empty()); // Will be populated after full response + // Stream the response await foreach (var content in chat.GetStreamingChatMessageContentsAsync( answerChat, @@ -365,8 +373,137 @@ You answer needs to be a json object with the following format. { if (content.Content is { Length: > 0 }) { - yield return new ChatChunkResponse(content.Content.Length, content.Content); + streamingResponse.Append(content.Content); + + ChatAppResponse response; + try + { + // Try parse as JSON to extract answer and thoughts + var currentJson = streamingResponse.ToString(); + var answerObject = JsonSerializer.Deserialize(currentJson); + var answer = answerObject.GetProperty("answer").GetString() ?? ""; + var thoughts = answerObject.TryGetProperty("thoughts", out var thoughtsProp) + ? thoughtsProp.GetString() + : ""; + + var responseMessage = new ResponseMessage("assistant", answer); + var updatedContext = documentContext with + { + Thoughts = !string.IsNullOrEmpty(thoughts) + ? new[] { new Thoughts("Thoughts", thoughts!) } + : Array.Empty() + }; + + var choice = new ResponseChoice( + Index: 0, + Message: responseMessage, + Context: updatedContext, + CitationBaseUrl: _configuration.ToCitationBaseUrl()); + + response = new ChatAppResponse(new[] { choice }); + } + catch (JsonException) + { + // If JSON parsing fails, return raw content + var responseMessage = new ResponseMessage("assistant", streamingResponse.ToString()); + var choice = new ResponseChoice( + Index: 0, + Message: responseMessage, + Context: documentContext, + CitationBaseUrl: _configuration.ToCitationBaseUrl()); + + response = new ChatAppResponse(new[] { choice }); + } + + yield return response; + } + } + + // After streaming complete, add follow-up questions if requested + if (overrides?.SuggestFollowupQuestions is true) + { + ChatAppResponse response; + var finalAnswer = streamingResponse.ToString(); + try + { + var answerObject = JsonSerializer.Deserialize(finalAnswer); + var answer = answerObject.GetProperty("answer").GetString() ?? ""; + var thoughts = answerObject.GetProperty("thoughts").GetString() ?? ""; + + var followUpQuestions = await GenerateFollowUpQuestionsAsync( + answer, + chat, + promptExecutingSetting, + cancellationToken); + + // Add follow-up questions to the answer text, just like in ReplyAsync + var answerWithQuestions = answer; + foreach (var followUpQuestion in followUpQuestions) + { + answerWithQuestions += $" <<{followUpQuestion}>> "; + } + + var responseMessage = new ResponseMessage("assistant", answerWithQuestions); + var finalContext = documentContext with + { + Thoughts = new[] { new Thoughts("Thoughts", thoughts) }, + FollowupQuestions = followUpQuestions + }; + + var choice = new ResponseChoice( + Index: 0, + Message: responseMessage, + Context: finalContext, + CitationBaseUrl: _configuration.ToCitationBaseUrl()); + + response = new ChatAppResponse(new[] { choice }); + } + catch (JsonException) + { + // If JSON parsing fails, return raw content + var responseMessage = new ResponseMessage("assistant", streamingResponse.ToString()); + var choice = new ResponseChoice( + Index: 0, + Message: responseMessage, + Context: documentContext, + CitationBaseUrl: _configuration.ToCitationBaseUrl()); + + response = new ChatAppResponse(new[] { choice }); } + + yield return response; } } + + private async Task GenerateFollowUpQuestionsAsync( + string answer, + IChatCompletionService chat, + OpenAIPromptExecutionSettings settings, + CancellationToken cancellationToken) + { + var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant"); + followUpQuestionChat.AddUserMessage($@"Generate three follow-up question based on the answer you just generated. +# Answer +{answer} + +# Format of the response +Return the follow-up question as a json string list. Don't put your answer between ```json and ```, return the json string directly. +e.g. +[ + ""What is the deductible?"", + ""What is the co-pay?"", + ""What is the out-of-pocket maximum?"" +]"); + + var followUpQuestions = await chat.GetChatMessageContentAsync( + followUpQuestionChat, + settings, + cancellationToken: cancellationToken); + + var followUpQuestionsJson = followUpQuestions.Content ?? throw new InvalidOperationException("Failed to get follow-up questions"); + var followUpQuestionsObject = JsonSerializer.Deserialize(followUpQuestionsJson); + return followUpQuestionsObject.EnumerateArray() + .Select(x => x.GetString()!) + .ToArray(); + } } From 3a128200be1dee642c57b85079c38caa366fc153 Mon Sep 17 00:00:00 2001 From: Duong Date: Thu, 21 Nov 2024 23:34:29 +0700 Subject: [PATCH 3/5] fix warning --- app/SharedWebComponents/Services/ApiClient.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/app/SharedWebComponents/Services/ApiClient.cs b/app/SharedWebComponents/Services/ApiClient.cs index a668d757..2fcfb37d 100644 --- a/app/SharedWebComponents/Services/ApiClient.cs +++ b/app/SharedWebComponents/Services/ApiClient.cs @@ -92,8 +92,6 @@ public async IAsyncEnumerable GetDocumentsAsync( public Task> ChatConversationAsync(ChatRequest request) => PostRequestAsync(request, "api/chat"); - public Task> ChatConversationStreamingAsync(ChatRequest request) => PostRequestAsync(request, "api/chat/stream"); - private async Task> PostRequestAsync( TRequest request, string apiRoute) where TRequest : ApproachRequest { @@ -140,19 +138,20 @@ public async Task> PostStreamingRequestAsync(); + var nullableResponses = response.Content.ReadFromJsonAsAsyncEnumerable(); + return nullableResponses.Where(r => r != null)!; } - + throw new HttpRequestException($"HTTP {(int)response.StatusCode} : {response.ReasonPhrase ?? "Unknown error"}"); } } From b504360c8ec03cd1e45cdba53fc204dd2addd2a7 Mon Sep 17 00:00:00 2001 From: Duong Date: Tue, 10 Dec 2024 16:21:00 +0700 Subject: [PATCH 4/5] streaming --- .../Services/ReadRetrieveReadChatService.cs | 91 ++++++++++--------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/app/backend/Services/ReadRetrieveReadChatService.cs b/app/backend/Services/ReadRetrieveReadChatService.cs index f86d1f59..ff238cbe 100644 --- a/app/backend/Services/ReadRetrieveReadChatService.cs +++ b/app/backend/Services/ReadRetrieveReadChatService.cs @@ -276,11 +276,20 @@ standard plan AND dental AND employee benefit. "); getQueryChat.AddUserMessage(question); - var result = await chat.GetChatMessageContentAsync( + var queryBuilder = new StringBuilder(); + + await foreach (var content in chat.GetStreamingChatMessageContentsAsync( getQueryChat, - cancellationToken: cancellationToken); + kernel: _kernel, + cancellationToken: cancellationToken)) + { + if (content.Content is { Length: > 0 }) + { + queryBuilder.Append(content.Content); + } + } - query = result.Content ?? throw new InvalidOperationException("Failed to get search query"); + query = queryBuilder.ToString() ?? throw new InvalidOperationException("Failed to get search query"); } // Search related documents @@ -430,15 +439,43 @@ You answer needs to be a json object with the following format. var answer = answerObject.GetProperty("answer").GetString() ?? ""; var thoughts = answerObject.GetProperty("thoughts").GetString() ?? ""; - var followUpQuestions = await GenerateFollowUpQuestionsAsync( - answer, - chat, - promptExecutingSetting, - cancellationToken); + // Inline the follow-up questions generation + var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant"); + followUpQuestionChat.AddUserMessage($@"Generate three follow-up question based on the answer you just generated. +# Answer +{answer} + +# Format of the response +Return the follow-up question as a json string list. Don't put your answer between ```json and ```, return the json string directly. +e.g. +[ + ""What is the deductible?"", + ""What is the co-pay?"", + ""What is the out-of-pocket maximum?"" +]"); - // Add follow-up questions to the answer text, just like in ReplyAsync + var followUpBuilder = new StringBuilder(); + await foreach (var content in chat.GetStreamingChatMessageContentsAsync( + followUpQuestionChat, + executionSettings: promptExecutingSetting, + kernel: _kernel, + cancellationToken: cancellationToken)) + { + if (content.Content is { Length: > 0 }) + { + followUpBuilder.Append(content.Content); + } + } + + var followUpQuestionsJson = followUpBuilder.ToString() ?? throw new InvalidOperationException("Failed to get follow-up questions"); + var followUpQuestionsObject = JsonSerializer.Deserialize(followUpQuestionsJson); + var followUpQuestionsList = followUpQuestionsObject.EnumerateArray() + .Select(x => x.GetString()!) + .ToArray(); + + // Add follow-up questions to the answer text var answerWithQuestions = answer; - foreach (var followUpQuestion in followUpQuestions) + foreach (var followUpQuestion in followUpQuestionsList) { answerWithQuestions += $" <<{followUpQuestion}>> "; } @@ -447,7 +484,7 @@ You answer needs to be a json object with the following format. var finalContext = documentContext with { Thoughts = new[] { new Thoughts("Thoughts", thoughts) }, - FollowupQuestions = followUpQuestions + FollowupQuestions = followUpQuestionsList }; var choice = new ResponseChoice( @@ -474,36 +511,4 @@ You answer needs to be a json object with the following format. yield return response; } } - - private async Task GenerateFollowUpQuestionsAsync( - string answer, - IChatCompletionService chat, - OpenAIPromptExecutionSettings settings, - CancellationToken cancellationToken) - { - var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant"); - followUpQuestionChat.AddUserMessage($@"Generate three follow-up question based on the answer you just generated. -# Answer -{answer} - -# Format of the response -Return the follow-up question as a json string list. Don't put your answer between ```json and ```, return the json string directly. -e.g. -[ - ""What is the deductible?"", - ""What is the co-pay?"", - ""What is the out-of-pocket maximum?"" -]"); - - var followUpQuestions = await chat.GetChatMessageContentAsync( - followUpQuestionChat, - settings, - cancellationToken: cancellationToken); - - var followUpQuestionsJson = followUpQuestions.Content ?? throw new InvalidOperationException("Failed to get follow-up questions"); - var followUpQuestionsObject = JsonSerializer.Deserialize(followUpQuestionsJson); - return followUpQuestionsObject.EnumerateArray() - .Select(x => x.GetString()!) - .ToArray(); - } } From 81a2d7828af345ae8de31f38caa7478393cc5981 Mon Sep 17 00:00:00 2001 From: Duong Date: Thu, 12 Dec 2024 09:58:09 +0700 Subject: [PATCH 5/5] Update only streaming --- app/SharedWebComponents/Pages/Chat.razor | 4 - app/SharedWebComponents/Pages/Chat.razor.cs | 38 +- app/SharedWebComponents/Services/ApiClient.cs | 58 +-- .../Extensions/WebApplicationExtensions.cs | 21 +- .../Services/ReadRetrieveReadChatService.cs | 370 ++++-------------- 5 files changed, 101 insertions(+), 390 deletions(-) diff --git a/app/SharedWebComponents/Pages/Chat.razor b/app/SharedWebComponents/Pages/Chat.razor index ff0ded0f..a36b4467 100644 --- a/app/SharedWebComponents/Pages/Chat.razor +++ b/app/SharedWebComponents/Pages/Chat.razor @@ -88,10 +88,6 @@ OnClick="@OnClearChat" Disabled=@(_isReceivingResponse || _questionAndAnswerMap is { Count: 0 }) /> - - - diff --git a/app/SharedWebComponents/Pages/Chat.razor.cs b/app/SharedWebComponents/Pages/Chat.razor.cs index 4020d189..f9899dd0 100644 --- a/app/SharedWebComponents/Pages/Chat.razor.cs +++ b/app/SharedWebComponents/Pages/Chat.razor.cs @@ -9,7 +9,6 @@ public sealed partial class Chat private UserQuestion _currentQuestion; private string _lastReferenceQuestion = ""; private bool _isReceivingResponse = false; - private bool _useStreaming = false; private readonly Dictionary _questionAndAnswerMap = []; @@ -45,38 +44,35 @@ private async Task OnAskClickedAsync() { var history = _questionAndAnswerMap .Where(x => x.Value?.Choices is { Length: > 0 }) - .SelectMany(x => new ChatMessage[] { new ChatMessage("user", x.Key.Question), new ChatMessage("assistant", x.Value!.Choices[0].Message.Content) }) + .SelectMany(x => new ChatMessage[] { + new ChatMessage("user", x.Key.Question), + new ChatMessage("assistant", x.Value!.Choices[0].Message.Content) + }) .ToList(); history.Add(new ChatMessage("user", _userQuestion)); var request = new ChatRequest([.. history], Settings.Overrides); - if (_useStreaming) + try { - try - { - await foreach (var response in await ApiClient.PostStreamingRequestAsync(request, "api/chat/stream")) - { - _questionAndAnswerMap[_currentQuestion] = new ChatAppResponseOrError( - response.Choices, - null); - - StateHasChanged(); - await Task.Delay(10); - } - } - catch (Exception ex) + var responseStream = await ApiClient.PostStreamingRequestAsync(request, "api/chat/stream"); + + await foreach (var response in responseStream) { _questionAndAnswerMap[_currentQuestion] = new ChatAppResponseOrError( - Array.Empty(), - ex.Message); + response.Choices, + null); + + StateHasChanged(); + await Task.Delay(1); } } - else + catch (Exception ex) { - var result = await ApiClient.ChatConversationAsync(request); - _questionAndAnswerMap[_currentQuestion] = result.Response; + _questionAndAnswerMap[_currentQuestion] = new ChatAppResponseOrError( + Array.Empty(), + ex.Message); } if (_questionAndAnswerMap[_currentQuestion]?.Error is null) diff --git a/app/SharedWebComponents/Services/ApiClient.cs b/app/SharedWebComponents/Services/ApiClient.cs index 2fcfb37d..3d3c1198 100644 --- a/app/SharedWebComponents/Services/ApiClient.cs +++ b/app/SharedWebComponents/Services/ApiClient.cs @@ -90,50 +90,6 @@ public async IAsyncEnumerable GetDocumentsAsync( } } - public Task> ChatConversationAsync(ChatRequest request) => PostRequestAsync(request, "api/chat"); - - private async Task> PostRequestAsync( - TRequest request, string apiRoute) where TRequest : ApproachRequest - { - var result = new AnswerResult( - IsSuccessful: false, - Response: null, - Approach: request.Approach, - Request: request); - - var json = JsonSerializer.Serialize( - request, - SerializerOptions.Default); - - using var body = new StringContent( - json, Encoding.UTF8, "application/json"); - - var response = await httpClient.PostAsync(apiRoute, body); - - if (response.IsSuccessStatusCode) - { - var answer = await response.Content.ReadFromJsonAsync(); - return result with - { - IsSuccessful = answer is not null, - Response = answer, - }; - } - else - { - var errorTitle = $"HTTP {(int)response.StatusCode} : {response.ReasonPhrase ?? "☹️ Unknown error..."}"; - var answer = new ChatAppResponseOrError( - Array.Empty(), - errorTitle); - - return result with - { - IsSuccessful = false, - Response = answer - }; - } - } - public async Task> PostStreamingRequestAsync( TRequest request, string apiRoute) where TRequest : ApproachRequest { @@ -141,14 +97,22 @@ public async Task> PostStreamingRequestAsync(); + var stream = await response.Content.ReadAsStreamAsync(); + var nullableResponses = JsonSerializer.DeserializeAsyncEnumerable( + stream, + new JsonSerializerOptions { PropertyNameCaseInsensitive = true }); + return nullableResponses.Where(r => r != null)!; } diff --git a/app/backend/Extensions/WebApplicationExtensions.cs b/app/backend/Extensions/WebApplicationExtensions.cs index 73e648e1..093cf80a 100644 --- a/app/backend/Extensions/WebApplicationExtensions.cs +++ b/app/backend/Extensions/WebApplicationExtensions.cs @@ -12,7 +12,7 @@ internal static WebApplication MapApi(this WebApplication app) api.MapPost("openai/chat", OnPostChatPromptAsync); // Long-form chat w/ contextual history endpoint - api.MapPost("chat", OnPostChatAsync); + api.MapPost("chat/stream", OnPostChatStreamingAsync); // Upload a document api.MapPost("documents", OnPostDocumentAsync); @@ -25,9 +25,6 @@ internal static WebApplication MapApi(this WebApplication app) api.MapGet("enableLogout", OnGetEnableLogout); - // Add streaming chat endpoint - api.MapPost("chat/stream", OnPostChatStreamingAsync); - return app; } @@ -73,22 +70,6 @@ You will always reply with a Markdown formatted response. } } - private static async Task OnPostChatAsync( - ChatRequest request, - ReadRetrieveReadChatService chatService, - CancellationToken cancellationToken) - { - if (request is { History.Length: > 0 }) - { - var response = await chatService.ReplyAsync( - request.History, request.Overrides, cancellationToken); - - return TypedResults.Ok(response); - } - - return Results.BadRequest(); - } - private static async IAsyncEnumerable OnPostChatStreamingAsync( ChatRequest request, ReadRetrieveReadChatService chatService, diff --git a/app/backend/Services/ReadRetrieveReadChatService.cs b/app/backend/Services/ReadRetrieveReadChatService.cs index ff238cbe..f512ef2e 100644 --- a/app/backend/Services/ReadRetrieveReadChatService.cs +++ b/app/backend/Services/ReadRetrieveReadChatService.cs @@ -57,10 +57,10 @@ public ReadRetrieveReadChatService( _tokenCredential = tokenCredential; } - public async Task ReplyAsync( + public async IAsyncEnumerable ReplyStreamingAsync( ChatMessage[] history, RequestOverrides? overrides, - CancellationToken cancellationToken = default) + [EnumeratorCancellation] CancellationToken cancellationToken = default) { var top = overrides?.Top ?? 3; var useSemanticCaptions = overrides?.SemanticCaptions ?? false; @@ -72,9 +72,8 @@ public async Task ReplyAsync( float[]? embeddings = null; var question = history.LastOrDefault(m => m.IsUser)?.Content is { } userQuestion ? userQuestion - : throw new InvalidOperationException("Use question is null"); + : throw new InvalidOperationException("User question is null"); - string[]? followUpQuestionList = null; if (overrides?.RetrievalMode != RetrievalMode.Text && embedding is not null) { embeddings = (await embedding.GenerateEmbeddingAsync(question, cancellationToken: cancellationToken)).ToArray(); @@ -93,11 +92,20 @@ standard plan AND dental AND employee benefit. "); getQueryChat.AddUserMessage(question); - var result = await chat.GetChatMessageContentAsync( + var queryBuilder = new StringBuilder(); + + await foreach (var content in chat.GetStreamingChatMessageContentsAsync( getQueryChat, - cancellationToken: cancellationToken); + kernel: _kernel, + cancellationToken: cancellationToken)) + { + if (content.Content is { Length: > 0 }) + { + queryBuilder.Append(content.Content); + } + } - query = result.Content ?? throw new InvalidOperationException("Failed to get search query"); + query = queryBuilder.ToString() ?? throw new InvalidOperationException("Failed to get search query"); } // step 2 @@ -171,186 +179,6 @@ Your answer needs to be a json object with answer and thoughts field. {documentContents} ## End ## -You answer needs to be a json object with the following format. -{{ - ""answer"": // the answer to the question, add a source reference to the end of each sentence. e.g. Apple is a fruit [reference1.pdf][reference2.pdf]. If no source available, put the answer as I don't know. - ""thoughts"": // brief thoughts on how you came up with the answer, e.g. what sources you used, what you thought about, etc. -}}"; - answerChat.AddUserMessage(prompt); - } - - var promptExecutingSetting = new OpenAIPromptExecutionSettings - { - MaxTokens = 1024, - Temperature = overrides?.Temperature ?? 0.7, - StopSequences = [], - }; - - // get answer - var answer = await chat.GetChatMessageContentAsync( - answerChat, - promptExecutingSetting, - cancellationToken: cancellationToken); - var answerJson = answer.Content ?? throw new InvalidOperationException("Failed to get search query"); - var answerObject = JsonSerializer.Deserialize(answerJson); - var ans = answerObject.GetProperty("answer").GetString() ?? throw new InvalidOperationException("Failed to get answer"); - var thoughts = answerObject.GetProperty("thoughts").GetString() ?? throw new InvalidOperationException("Failed to get thoughts"); - - // step 4 - // add follow up questions if requested - if (overrides?.SuggestFollowupQuestions is true) - { - var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant"); - followUpQuestionChat.AddUserMessage($@"Generate three follow-up question based on the answer you just generated. -# Answer -{ans} - -# Format of the response -Return the follow-up question as a json string list. Don't put your answer between ```json and ```, return the json string directly. -e.g. -[ - ""What is the deductible?"", - ""What is the co-pay?"", - ""What is the out-of-pocket maximum?"" -]"); - - var followUpQuestions = await chat.GetChatMessageContentAsync( - followUpQuestionChat, - promptExecutingSetting, - cancellationToken: cancellationToken); - - var followUpQuestionsJson = followUpQuestions.Content ?? throw new InvalidOperationException("Failed to get search query"); - var followUpQuestionsObject = JsonSerializer.Deserialize(followUpQuestionsJson); - var followUpQuestionsList = followUpQuestionsObject.EnumerateArray().Select(x => x.GetString()!).ToList(); - foreach (var followUpQuestion in followUpQuestionsList) - { - ans += $" <<{followUpQuestion}>> "; - } - - followUpQuestionList = followUpQuestionsList.ToArray(); - } - - var responseMessage = new ResponseMessage("assistant", ans); - var responseContext = new ResponseContext( - DataPointsContent: documentContentList.Select(x => new SupportingContentRecord(x.Title, x.Content)).ToArray(), - DataPointsImages: images?.Select(x => new SupportingImageRecord(x.Title, x.Url)).ToArray(), - FollowupQuestions: followUpQuestionList ?? Array.Empty(), - Thoughts: new[] { new Thoughts("Thoughts", thoughts) }); - - var choice = new ResponseChoice( - Index: 0, - Message: responseMessage, - Context: responseContext, - CitationBaseUrl: _configuration.ToCitationBaseUrl()); - - return new ChatAppResponse(new[] { choice }); - } - - public async IAsyncEnumerable ReplyStreamingAsync( - ChatMessage[] history, - RequestOverrides? overrides, - [EnumeratorCancellation] CancellationToken cancellationToken = default) - { - var chat = _kernel.GetRequiredService(); - var embedding = _kernel.GetRequiredService(); - float[]? embeddings = null; - var question = history.LastOrDefault(m => m.IsUser)?.Content is { } userQuestion - ? userQuestion - : throw new InvalidOperationException("User question is null"); - - // Generate embeddings if needed - if (overrides?.RetrievalMode != RetrievalMode.Text && embedding is not null) - { - embeddings = (await embedding.GenerateEmbeddingAsync(question, cancellationToken: cancellationToken)).ToArray(); - } - - // Get search query - string? query = null; - if (overrides?.RetrievalMode != RetrievalMode.Vector) - { - var getQueryChat = new ChatHistory(@"You are a helpful AI assistant, generate search query for followup question. -Make your respond simple and precise. Return the query only, do not return any other text. -e.g. -Northwind Health Plus AND standard plan. -standard plan AND dental AND employee benefit. -"); - - getQueryChat.AddUserMessage(question); - var queryBuilder = new StringBuilder(); - - await foreach (var content in chat.GetStreamingChatMessageContentsAsync( - getQueryChat, - kernel: _kernel, - cancellationToken: cancellationToken)) - { - if (content.Content is { Length: > 0 }) - { - queryBuilder.Append(content.Content); - } - } - - query = queryBuilder.ToString() ?? throw new InvalidOperationException("Failed to get search query"); - } - - // Search related documents - var documentContentList = await _searchClient.QueryDocumentsAsync(query, embeddings, overrides, cancellationToken); - string documentContents = documentContentList.Length == 0 - ? "no source available." - : string.Join("\r", documentContentList.Select(x => $"{x.Title}:{x.Content}")); - - // Get images if vision service available - SupportingImageRecord[]? images = default; - if (_visionService is not null) - { - var queryEmbeddings = await _visionService.VectorizeTextAsync(query ?? question, cancellationToken); - images = await _searchClient.QueryImagesAsync(query, queryEmbeddings.vector, overrides, cancellationToken); - } - - // Prepare chat history - var answerChat = new ChatHistory( - "You are a system assistant who helps the company employees with their questions. Be brief in your answers"); - - foreach (var message in history) - { - if (message.IsUser) - { - answerChat.AddUserMessage(message.Content); - } - else - { - answerChat.AddAssistantMessage(message.Content); - } - } - - // Add final prompt with context - if (images != null) - { - var prompt = @$"## Source ## -{documentContents} -## End ## - -Answer question based on available source and images. -Your answer needs to be a json object with answer and thoughts field. -Don't put your answer between ```json and ```, return the json string directly. e.g {{""answer"": ""I don't know"", ""thoughts"": ""I don't know""}}"; - - var tokenRequestContext = new TokenRequestContext(new[] { "https://storage.azure.com/.default" }); - var sasToken = await (_tokenCredential?.GetTokenAsync(tokenRequestContext, cancellationToken) ?? throw new InvalidOperationException("Failed to get token")); - var imageUrls = images.Select(x => $"{x.Url}?{sasToken.Token}").ToArray(); - var collection = new ChatMessageContentItemCollection(); - collection.Add(new TextContent(prompt)); - foreach (var imageUrl in imageUrls) - { - collection.Add(new ImageContent(new Uri(imageUrl))); - } - - answerChat.AddUserMessage(collection); - } - else - { - var prompt = @$" ## Source ## -{documentContents} -## End ## - You answer needs to be a json object with the following format. {{ ""answer"": // the answer to the question, add a source reference to the end of each sentence. e.g. Apple is a fruit [reference1.pdf][reference2.pdf]. If no source available, put the answer as I don't know. @@ -373,7 +201,7 @@ You answer needs to be a json object with the following format. FollowupQuestions: Array.Empty(), // Will be populated after full response Thoughts: Array.Empty()); // Will be populated after full response - // Stream the response + // get answer await foreach (var content in chat.GetStreamingChatMessageContentsAsync( answerChat, executionSettings: promptExecutingSetting, @@ -383,132 +211,78 @@ You answer needs to be a json object with the following format. if (content.Content is { Length: > 0 }) { streamingResponse.Append(content.Content); + var responseMessage = new ResponseMessage("assistant", streamingResponse.ToString()); + var choice = new ResponseChoice( + Index: 0, + Message: responseMessage, + Context: documentContext, + CitationBaseUrl: _configuration.ToCitationBaseUrl()); + - ChatAppResponse response; - try - { - // Try parse as JSON to extract answer and thoughts - var currentJson = streamingResponse.ToString(); - var answerObject = JsonSerializer.Deserialize(currentJson); - var answer = answerObject.GetProperty("answer").GetString() ?? ""; - var thoughts = answerObject.TryGetProperty("thoughts", out var thoughtsProp) - ? thoughtsProp.GetString() - : ""; - - var responseMessage = new ResponseMessage("assistant", answer); - var updatedContext = documentContext with - { - Thoughts = !string.IsNullOrEmpty(thoughts) - ? new[] { new Thoughts("Thoughts", thoughts!) } - : Array.Empty() - }; - - var choice = new ResponseChoice( - Index: 0, - Message: responseMessage, - Context: updatedContext, - CitationBaseUrl: _configuration.ToCitationBaseUrl()); - - response = new ChatAppResponse(new[] { choice }); - } - catch (JsonException) - { - // If JSON parsing fails, return raw content - var responseMessage = new ResponseMessage("assistant", streamingResponse.ToString()); - var choice = new ResponseChoice( - Index: 0, - Message: responseMessage, - Context: documentContext, - CitationBaseUrl: _configuration.ToCitationBaseUrl()); - - response = new ChatAppResponse(new[] { choice }); - } - - yield return response; + yield return new ChatAppResponse(new[] { choice }); } } - // After streaming complete, add follow-up questions if requested + // After streaming completes, parse the final answer + var answerJson = streamingResponse.ToString(); + var finalAnswerObject = JsonSerializer.Deserialize(answerJson); + var ans = finalAnswerObject.GetProperty("answer").GetString() ?? throw new InvalidOperationException("Failed to get answer"); + var finalThoughts = finalAnswerObject.GetProperty("thoughts").GetString() ?? throw new InvalidOperationException("Failed to get thoughts"); + + // Create response context that will be used throughout + var responseContext = new ResponseContext( + DataPointsContent: documentContentList.Select(x => new SupportingContentRecord(x.Title, x.Content)).ToArray(), + DataPointsImages: images?.Select(x => new SupportingImageRecord(x.Title, x.Url)).ToArray(), + FollowupQuestions: Array.Empty(), + Thoughts: new[] { new Thoughts("Thoughts", finalThoughts) }); + + // step 4 + // add follow up questions if requested if (overrides?.SuggestFollowupQuestions is true) { - ChatAppResponse response; - var finalAnswer = streamingResponse.ToString(); - try - { - var answerObject = JsonSerializer.Deserialize(finalAnswer); - var answer = answerObject.GetProperty("answer").GetString() ?? ""; - var thoughts = answerObject.GetProperty("thoughts").GetString() ?? ""; - - // Inline the follow-up questions generation - var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant"); - followUpQuestionChat.AddUserMessage($@"Generate three follow-up question based on the answer you just generated. + var followUpQuestionChat = new ChatHistory(@"You are a helpful AI assistant"); + followUpQuestionChat.AddUserMessage($@"Generate three follow-up questions based on the answer you just generated. # Answer -{answer} +{ans} # Format of the response -Return the follow-up question as a json string list. Don't put your answer between ```json and ```, return the json string directly. -e.g. -[ - ""What is the deductible?"", - ""What is the co-pay?"", - ""What is the out-of-pocket maximum?"" -]"); - - var followUpBuilder = new StringBuilder(); - await foreach (var content in chat.GetStreamingChatMessageContentsAsync( - followUpQuestionChat, - executionSettings: promptExecutingSetting, - kernel: _kernel, - cancellationToken: cancellationToken)) +Generate three questions, one per line. Do not include any JSON formatting or other text. +For example: +What is the deductible? +What is the co-pay? +What is the out-of-pocket maximum?"); + + var followUpQuestions = new List(); + var followUpBuilder = new StringBuilder(); + await foreach (var content in chat.GetStreamingChatMessageContentsAsync( + followUpQuestionChat, + executionSettings: promptExecutingSetting, + kernel: _kernel, + cancellationToken: cancellationToken)) + { + if (content.Content is { Length: > 0 }) { - if (content.Content is { Length: > 0 }) + followUpBuilder.Append(content.Content); + var questions = followUpBuilder.ToString().Split('\n', StringSplitOptions.RemoveEmptyEntries); + + var answerWithQuestions = ans; + foreach (var followUpQuestion in questions) { - followUpBuilder.Append(content.Content); + answerWithQuestions += $" <<{followUpQuestion.Trim()}>> "; } - } - - var followUpQuestionsJson = followUpBuilder.ToString() ?? throw new InvalidOperationException("Failed to get follow-up questions"); - var followUpQuestionsObject = JsonSerializer.Deserialize(followUpQuestionsJson); - var followUpQuestionsList = followUpQuestionsObject.EnumerateArray() - .Select(x => x.GetString()!) - .ToArray(); - - // Add follow-up questions to the answer text - var answerWithQuestions = answer; - foreach (var followUpQuestion in followUpQuestionsList) - { - answerWithQuestions += $" <<{followUpQuestion}>> "; - } - - var responseMessage = new ResponseMessage("assistant", answerWithQuestions); - var finalContext = documentContext with - { - Thoughts = new[] { new Thoughts("Thoughts", thoughts) }, - FollowupQuestions = followUpQuestionsList - }; - var choice = new ResponseChoice( - Index: 0, - Message: responseMessage, - Context: finalContext, - CitationBaseUrl: _configuration.ToCitationBaseUrl()); + var responseMessage = new ResponseMessage("assistant", answerWithQuestions); + var updatedContext = responseContext with { FollowupQuestions = questions }; - response = new ChatAppResponse(new[] { choice }); - } - catch (JsonException) - { - // If JSON parsing fails, return raw content - var responseMessage = new ResponseMessage("assistant", streamingResponse.ToString()); - var choice = new ResponseChoice( - Index: 0, - Message: responseMessage, - Context: documentContext, - CitationBaseUrl: _configuration.ToCitationBaseUrl()); + var choice = new ResponseChoice( + Index: 0, + Message: responseMessage, + Context: updatedContext, + CitationBaseUrl: _configuration.ToCitationBaseUrl()); - response = new ChatAppResponse(new[] { choice }); + yield return new ChatAppResponse(new[] { choice }); + } } - - yield return response; } } }