From 1f567f38f23288c108a791bed5be9070ca3c285d Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Fri, 9 Aug 2024 18:54:05 +0100 Subject: [PATCH 01/20] Improving the ITextToImageService to support ExecutionSettings, ImageContent and TextContent --- .../TextToImage/OpenAI_TextToImageDalle3.cs | 19 +++ .../AzureOpenAITextToImageService.cs | 73 ++++++++ .../OpenAITextToImageExecutionSettings.cs | 159 ++++++++++++++++++ .../TextToImage/OpenAITextToImageService.cs | 6 + .../AI/TextToImage/ITextToImageService.cs | 16 ++ 5 files changed, 273 insertions(+) create mode 100644 dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs index 32e78c9382a8..bf09f9c7687a 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs @@ -4,6 +4,7 @@ using Microsoft.Extensions.Http.Resilience; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.TextToImage; namespace TextToImage; @@ -78,6 +79,24 @@ A cute baby sea otter */ } + [Fact] + public async Task SimpleTextToImageExampleAsync() + { + var builder = Kernel.CreateBuilder() + .AddAzureOpenAITextToImage( // Add your text to image service + deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName, + endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint, + apiKey: TestConfiguration.AzureOpenAI.ImageApiKey, + modelId: TestConfiguration.AzureOpenAI.ImageModelId); + + var kernel = builder.Build(); + var service = kernel.GetRequiredService(); + + var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Width = 1024, Height = 1024 }); + + this.Output.WriteLine(generatedImages[0].Uri!.ToString()); + } + [Fact(Skip = "Generating the Image can take too long and often break the test")] public async Task AzureOpenAIDallEAsync() { diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs index efa3ffcc87c0..a1abd49388cd 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs @@ -209,4 +209,77 @@ internal void AddAttribute(string key, string? value) this._attributes.Add(key, value); } } + + /// + public async Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) + { + var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings); + + Verify.NotNull(input); + + var size = (imageSettings.Width, imageSettings.Height) switch + { + (256, 256) => ImageSize.Size256x256, + (512, 512) => ImageSize.Size512x512, + (1024, 1024) => ImageSize.Size1024x1024, + (1792, 1024) => ImageSize.Size1792x1024, + (1024, 1792) => ImageSize.Size1024x1792, + _ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}") + }; + + Response imageGenerations; + try + { + var options = new ImageGenerationOptions + { + DeploymentName = this._deploymentName, + ImageCount = imageSettings.ImageCount, + Prompt = input.Text, + Size = size, + }; + + if (imageSettings.Quality is not null) + { + options.Quality = imageSettings.Quality; + } + if (imageSettings.Style is not null) + { + options.Style = imageSettings.Style; + } + + imageGenerations = await this._client.GetImageGenerationsAsync(options, cancellationToken).ConfigureAwait(false); + } + catch (RequestFailedException e) + { + throw e.ToHttpOperationException(); + } + + if (!imageGenerations.HasValue) + { + throw new KernelException("The response does not contain an image result"); + } + + if (imageGenerations.Value.Data.Count == 0) + { + throw new KernelException("The response does not contain any image"); + } + + List images = []; + foreach (var image in imageGenerations.Value.Data) + { + if (image.Url is not null) + { + images.Add(new ImageContent(image.Url)); + } + else if (image.Base64Data is not null) + { + images.Add(new ImageContent($"data:;base64,{image.Base64Data}")); + } + else + { + throw new NotSupportedException("Image is neither an URL nor a base64 data"); + } + } + return images; + } } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs new file mode 100644 index 000000000000..4c58c9e76614 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs @@ -0,0 +1,159 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.SemanticKernel.Text; + +namespace Microsoft.SemanticKernel.Connectors.OpenAI; + +/// +/// Text to image execution settings for an OpenAI image generation request. +/// +[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)] +public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings +{ + private const int DefaultWidth = 1024; + private const int DefaultHeight = 1024; + + /// + /// Initializes a new instance of the class. + /// + public OpenAITextToImageExecutionSettings() + { + this.Width = DefaultWidth; + this.Height = DefaultHeight; + } + /// + /// Width of the generated image. + /// + public int Width + { + get => this._width; + + set + { + this.ThrowIfFrozen(); + this._width = value; + } + } + + /// + /// The quality of the image that will be generated. + /// `hd` creates images with finer details and greater consistency across the image. + /// This param is only supported for dall-e-3. + /// + public string? Quality + { + get => this._quality; + + set + { + this.ThrowIfFrozen(); + this._quality = value; + } + } + + /// + /// The number of images to generate. Must be between 1 and 10. + /// For dall-e-3, only ImageCount = 1 is supported. + /// + public int? ImageCount + { + get => this._imageCount; + + set + { + this.ThrowIfFrozen(); + this._imageCount = value; + } + } + + /// + /// The style of the generated images. Must be one of vivid or natural. + /// Vivid causes the model to lean towards generating hyper-real and dramatic images. + /// Natural causes the model to produce more natural, less hyper-real looking images. + /// This param is only supported for dall-e-3. + /// + public string? Style + { + get => this._style; + + set + { + this.ThrowIfFrozen(); + this._style = value; + } + } + + /// + /// Height of the generated image. + /// + public int Height + { + get => this._height; + + set + { + this.ThrowIfFrozen(); + this._height = value; + } + } + + /// + public override void Freeze() + { + if (this.IsFrozen) + { + return; + } + + base.Freeze(); + } + + /// + public override PromptExecutionSettings Clone() + { + return new OpenAITextToImageExecutionSettings() + { + ModelId = this.ModelId, + ExtensionData = this.ExtensionData is not null ? new Dictionary(this.ExtensionData) : null, + Width = this.Width, + Height = this.Height, + }; + } + + /// + /// Create a new settings object with the values from another settings object. + /// + /// Template configuration + /// Default max tokens + /// An instance of OpenAIPromptExecutionSettings + public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings, int? defaultMaxTokens = null) + { + if (executionSettings is null) + { + return new OpenAITextToImageExecutionSettings(); + } + + if (executionSettings is OpenAITextToImageExecutionSettings settings) + { + return settings; + } + + var json = JsonSerializer.Serialize(executionSettings); + + var openAIExecutionSettings = JsonSerializer.Deserialize(json, JsonOptionsCache.ReadPermissive); + return openAIExecutionSettings!; + } + + #region private ================================================================================ + + private int _width; + private int _height; + private int? _imageCount; + private string? _quality; + private string? _style; + + #endregion +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs index 335fe8cad5ee..ea6420fcfccb 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs @@ -93,6 +93,12 @@ public Task GenerateImageAsync(string description, int width, int height return this.GenerateImageAsync(this._modelId, description, width, height, "url", x => x.Url, cancellationToken); } + /// + public Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) + { + throw new NotImplementedException(); + } + private async Task GenerateImageAsync( string? model, string description, diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs index c4c967445a6b..7919f04bb14a 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Threading; using System.Threading.Tasks; @@ -29,4 +30,19 @@ public Task GenerateImageAsync( int height, Kernel? kernel = null, CancellationToken cancellationToken = default); + + /// + /// Generate images matching the given text description + /// + /// Input text for image generation + /// Text to image execution settings + /// The containing services, plugins, and other state for use throughout the operation. + /// The to monitor for cancellation requests. The default is . + /// Generated image contents + [Experimental("SKEXP0001")] + public Task> GetImageContentsAsync( + TextContent input, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default); } From 201423b0c1298ffec9aec62c54f0e56568539471 Mon Sep 17 00:00:00 2001 From: aghimir3 <22482815+aghimir3@users.noreply.github.com> Date: Tue, 13 Aug 2024 20:49:34 -0700 Subject: [PATCH 02/20] Add quality and style to TextToImageRequest --- .../TextToImage/TextToImageRequest.cs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/TextToImageRequest.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/TextToImageRequest.cs index 70b5ac5418ee..704f973c29b5 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/TextToImageRequest.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/TextToImageRequest.cs @@ -39,4 +39,16 @@ internal sealed class TextToImageRequest /// [JsonPropertyName("response_format")] public string Format { get; set; } = "url"; + + /// + /// Image quality, "standard" or "hd" + /// + [JsonPropertyName("quality")] + public string Quality { get; set; } = "standard"; + + /// + /// Image style, "vivid" or "natural" + /// + [JsonPropertyName("style")] + public string Style { get; set; } = "vivid"; } From 6895c051be0fc91616d3acdc7a3b5598e563f6e6 Mon Sep 17 00:00:00 2001 From: aghimir3 <22482815+aghimir3@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:46:23 -0700 Subject: [PATCH 03/20] Implement GetImageContentsAsync in OpenAITextToImageService - Added the GetImageContentsAsync method to the OpenAITextToImageService class. - Implemented validation for input, including width, height, quality, and style settings. - Supported image sizes include 256x256, 512x512, 1024x1024, 1792x1024, and 1024x1792. - Added checks for supported qualities ('standard', 'hd') and styles ('vivid', 'natural'). - Constructed the request body for image generation and processed the response to handle both URLs and base64-encoded images. - Converted image strings into ImageContent objects, ensuring proper handling of data URIs and HTTP URLs. --- .../TextToImage/OpenAITextToImageService.cs | 68 ++++++++++++++++++- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs index ea6420fcfccb..6e2be2425abf 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; +using System.Linq; using System.Net.Http; using System.Text.Json; using System.Threading; @@ -94,9 +95,72 @@ public Task GenerateImageAsync(string description, int width, int height } /// - public Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) + public async Task> GetImageContentsAsync( + TextContent input, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default) { - throw new NotImplementedException(); + // Ensure the input is valid + Verify.NotNull(input); + + // Convert the generic execution settings to OpenAI-specific settings + var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings); + + // Determine the size of the image based on the width and height settings + var size = (imageSettings.Width, imageSettings.Height) switch + { + (256, 256) => "256x256", + (512, 512) => "512x512", + (1024, 1024) => "1024x1024", + (1792, 1024) => "1792x1024", + (1024, 1792) => "1024x1792", + _ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}") + }; + + // Validate quality and style + var supportedQualities = new[] { "standard", "hd" }; + var supportedStyles = new[] { "vivid", "natural" }; + + if (!string.IsNullOrEmpty(imageSettings.Quality) && !supportedQualities.Contains(imageSettings.Quality)) + { + throw new NotSupportedException($"The provided quality '{imageSettings.Quality}' is not supported."); + } + + if (!string.IsNullOrEmpty(imageSettings.Style) && !supportedStyles.Contains(imageSettings.Style)) + { + throw new NotSupportedException($"The provided style '{imageSettings.Style}' is not supported."); + } + + // Create the request body for the image generation + var requestBody = JsonSerializer.Serialize(new TextToImageRequest + { + Model = imageSettings.ModelId ?? this._modelId, + Prompt = input.Text ?? string.Empty, + Size = size, + Count = imageSettings.ImageCount ?? 1, + Quality = imageSettings.Quality ?? "standard", + Style = imageSettings.Style ?? "vivid" + }); + + // Execute the request using the core client and return Image objects + var imageStrings = await this._core.ExecuteImageGenerationRequestAsync(OpenAIEndpoint, requestBody, x => x.Url ?? x.AsBase64, cancellationToken).ConfigureAwait(false); + + // Convert the strings to ImageContent objects + var images = new List(); + foreach (var imageString in imageStrings) + { + if (Uri.TryCreate(imageString, UriKind.Absolute, out var uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps)) + { + images.Add(new ImageContent(uriResult)); + } + else + { + images.Add(new ImageContent($"data:;base64,{imageString}")); + } + } + + return images.AsReadOnly(); } private async Task GenerateImageAsync( From ef12678b49a71006fc773f56fd0168ed93ccbd08 Mon Sep 17 00:00:00 2001 From: aghimir3 <22482815+aghimir3@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:48:02 -0700 Subject: [PATCH 04/20] Add unit tests for GetImageContentsAsync method - Implemented unit tests for the GetImageContentsAsync method in OpenAITextToImageService. - Added a test to verify that the method returns expected ImageContent when provided with valid input. - Added parameterized tests using [Theory] and [InlineData] to cover a variety of scenarios: - Valid URL and base64 image data inputs. - Validation of input sizes, quality, and style parameters. - Ensured NotSupportedException is thrown for unsupported sizes, quality, and style. - Tests ensure that both HTTP URLs and base64-encoded images are handled correctly, with proper assertions on the returned ImageContent objects. --- .../OpenAITextToImageServiceTests.cs | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/dotnet/src/Connectors/Connectors.UnitTests/OpenAI/TextToImage/OpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.UnitTests/OpenAI/TextToImage/OpenAITextToImageServiceTests.cs index 1f31ec076edd..8855a233b27f 100644 --- a/dotnet/src/Connectors/Connectors.UnitTests/OpenAI/TextToImage/OpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.UnitTests/OpenAI/TextToImage/OpenAITextToImageServiceTests.cs @@ -1,10 +1,12 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Linq; using System.Net.Http; using System.Text; using System.Threading.Tasks; using Microsoft.Extensions.Logging; +using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Connectors.OpenAI; using Moq; using Xunit; @@ -81,6 +83,122 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, bool e } } + [Fact] + public async Task GetImageContentsAsyncWithValidInputReturnsImageContentsAsync() + { + // Arrange + var service = new OpenAITextToImageService("api-key", "organization", "dall-e-3", this._httpClient); + Assert.Equal("dall-e-3", service.Attributes["ModelId"]); + + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) + { + Content = new StringContent(""" + { + "created": 1702575371, + "data": [ + { + "url": "https://image-url" + } + ] + } + """, Encoding.UTF8, "application/json") + }; + + var input = new TextContent("A cute baby sea otter"); + var executionSettings = new OpenAITextToImageExecutionSettings + { + Width = 1024, + Height = 1024, + Quality = "hd", + Style = "natural", + ImageCount = 1 + }; + + // Act + var result = await service.GetImageContentsAsync(input, executionSettings); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + Assert.Equal(new Uri("https://image-url"), result[0].Uri); + } + + [Theory] + [InlineData(1024, 1024, "hd", "natural", 1, "https://image-url", false)] + [InlineData(123, 456, "hd", "natural", 1, "", true)] + [InlineData(1024, 1024, "hd", "natural", 2, "https://image-url1|https://image-url2", false)] + [InlineData(1024, 1024, "ultra", "natural", 1, "", true)] + [InlineData(1024, 1024, "hd", "artistic", 1, "", true)] + public async Task GetImageContentsReturnsExpectedResultsAsync( + int width, + int height, + string quality, + string style, + int imageCount, + string expectedUrls, + bool expectException) + { + // Arrange + var service = new OpenAITextToImageService("api-key", "organization", "dall-e-3", this._httpClient); + + if (!expectException) + { + var urls = expectedUrls.Split('|').Select(url => + { + return url.StartsWith("http", StringComparison.OrdinalIgnoreCase) ? + $"{{ \"url\": \"{url}\" }}" : + $"{{ \"b64_json\": \"{url}\" }}"; + }); + var jsonResponse = $"{{ \"created\": 1702575371, \"data\": [ {string.Join(",", urls)} ] }}"; + + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) + { + Content = new StringContent(jsonResponse, Encoding.UTF8, "application/json") + }; + } + + var input = new TextContent("A picturesque landscape"); + var executionSettings = new OpenAITextToImageExecutionSettings + { + Width = width, + Height = height, + Quality = quality, + Style = style, + ImageCount = imageCount + }; + + // Act & Assert + if (expectException) + { + await Assert.ThrowsAsync(async () => + { + await service.GetImageContentsAsync(input, executionSettings); + }); + } + else + { + var result = await service.GetImageContentsAsync(input, executionSettings); + + Assert.NotNull(result); + Assert.Equal(imageCount, result.Count); + + var expectedUrlList = expectedUrls.Split('|').ToList(); + for (int i = 0; i < result.Count; i++) + { + if (Uri.TryCreate(expectedUrlList[i], UriKind.Absolute, out var uriResult) && + (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps)) + { + Assert.Equal(uriResult, result[i].Uri); + } + else + { + Assert.StartsWith("data:;base64,", result[i].DataUri); + Assert.Contains(expectedUrlList[i], result[i].DataUri); + } + } + } + } + public void Dispose() { this._httpClient.Dispose(); From 36d4fb9e608a85edc6c265ac26953c0eb13d0cc0 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:28:17 +0100 Subject: [PATCH 05/20] Adding support for Text-To-Image Settings --- .../AzureOpenAITextToImageServiceTests.cs | 192 +++++++++++- ...ext-to-image-b64_json-format-response.json | 9 + .../TestData/text-to-image-response.json} | 1 + .../TestData/text-to-image-response.txt | 9 - .../Services/AzureOpenAITextToImageService.cs | 18 +- .../Connectors.OpenAI.UnitTests.csproj | 5 +- .../Services/OpenAITextToImageServiceTests.cs | 185 +++++++++++- ...ext-to-image-b64_json-format-response.json | 9 + .../TestData/text-to-image-response.json | 9 + .../Connectors.OpenAI.csproj | 4 + .../Core/ClientCore.TextToImage.cs | 111 +++++++ .../Services/OpenAITextToImageService.cs | 15 +- .../OpenAITextToImageExecutionSettings.cs | 66 ++-- .../AzureOpenAITextToImageService.cs | 285 ------------------ .../TextToImage/OpenAITextToImageService.cs | 187 ------------ .../Contents/TextContent.cs | 9 + 16 files changed, 584 insertions(+), 530 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json rename dotnet/src/Connectors/{Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt => Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json} (71%) delete mode 100644 dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt create mode 100644 dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json create mode 100644 dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json rename dotnet/src/Connectors/Connectors.OpenAI/{TextToImage => Settings}/OpenAITextToImageExecutionSettings.cs (75%) delete mode 100644 dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs delete mode 100644 dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs index 60aed7875b56..302ba338697d 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs @@ -2,16 +2,21 @@ using System; using System.IO; +using System.Net; using System.Net.Http; +using System.Text; using System.Text.Json; using System.Text.Json.Nodes; using System.Threading.Tasks; using Azure.AI.OpenAI; using Azure.Core; using Microsoft.Extensions.Logging; +using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Connectors.AzureOpenAI; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; using Moq; +using OpenAI.Images; namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Services; @@ -30,7 +35,7 @@ public AzureOpenAITextToImageServiceTests() { ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) { - Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.txt")) + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.json")) } }; this._httpClient = new HttpClient(this._messageHandlerStub, false); @@ -143,6 +148,191 @@ public void ItShouldThrowExceptionIfNoEndpointProvided(bool useTokeCredential, s } } + [Theory] + [InlineData(null, null)] + [InlineData("uri", "url")] + [InlineData("url", "url")] + [InlineData("GeneratedImage.Uri", "url")] + [InlineData("bytes", "b64_json")] + [InlineData("b64_json", "b64_json")] + [InlineData("GeneratedImage.Bytes", "b64_json")] + public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? uri, string? expectedResponseFormat) + { + // Arrange + object? responseFormatObject = uri switch + { + "GeneratedImage.Uri" => GeneratedImageFormat.Uri, + "GeneratedImage.Bytes" => GeneratedImageFormat.Bytes, + _ => uri + }; + + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = responseFormatObject }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedResponseFormat is not null) + { + Assert.Contains($"\"response_format\":\"{expectedResponseFormat}\"", requestBody); + } + else + { + // Then no response format is provided, it should not be included in the request body + Assert.DoesNotContain("response_format", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("hd", "hd")] + [InlineData("high", "hd")] + [InlineData("standard", "standard")] + public async Task GetUriImageContentsImageQualityRequestWorksCorrectlyAsync(string? quality, string? expectedQuality) + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Quality = quality }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedQuality is not null) + { + Assert.Contains($"\"quality\":\"{expectedQuality}\"", requestBody); + } + else + { + // Then no quality is provided, it should not be included in the request body + Assert.DoesNotContain("quality", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("vivid", "vivid")] + [InlineData("natural", "natural")] + public async Task GetUriImageContentsImageStyleRequestWorksCorrectlyAsync(string? style, string? expectedStyle) + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Style = style }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedStyle is not null) + { + Assert.Contains($"\"style\":\"{expectedStyle}\"", requestBody); + } + else + { + // Then no style is provided, it should not be included in the request body + Assert.DoesNotContain("style", requestBody); + } + } + + [Theory] + [InlineData(null, null, null)] + [InlineData(1, 2, "1x2")] + public async Task GetUriImageContentsImageSizeRequestWorksCorrectlyAsync(int? width, int? height, string? expectedSize) + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings + { + Size = width.HasValue && height.HasValue + ? (width.Value, height.Value) + : null + }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedSize is not null) + { + Assert.Contains($"\"size\":\"{expectedSize}\"", requestBody); + } + else + { + // Then no size is provided, it should not be included in the request body + Assert.DoesNotContain("size", requestBody); + } + } + + [Fact] + public async Task GetByteImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) + { + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-b64_json-format-response.json")) + }; + + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "b64_json" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.True(imageContent.CanRead); + Assert.Equal("image/png", imageContent.MimeType); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + + [Fact] + public async Task GetUrlImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "url" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.False(imageContent.CanRead); + Assert.Equal(new Uri("https://image-url/"), imageContent.Uri); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + public void Dispose() { this._httpClient.Dispose(); diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json new file mode 100644 index 000000000000..e004607fa8f0 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json @@ -0,0 +1,9 @@ +{ + "created": 1726234481, + "data": [ + { + "b64_json": "iVBORw0KGgoAAA==", + "revised_prompt": "my prompt" + } + ] +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json similarity index 71% rename from dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt rename to dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json index 7d8f7327a5ec..8fd01a13c7ac 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json @@ -2,6 +2,7 @@ "created": 1702575371, "data": [ { + "revised_prompt": "my prompt", "url": "https://image-url/" } ] diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt deleted file mode 100644 index 1d6f2150b1d5..000000000000 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt +++ /dev/null @@ -1,9 +0,0 @@ -{ - "created": 1702575371, - "data": [ - { - "revised_prompt": "A photo capturing the diversity of the Earth's landscapes.", - "url": "https://image-url/" - } - ] -} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs index b066cc4b3e66..b11840a2cf1f 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs @@ -46,11 +46,8 @@ public AzureOpenAITextToImageService( { Verify.NotNullOrWhiteSpace(apiKey); - var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri; - if (connectorEndpoint is null) - { - throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - } + var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri) + ?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); var options = AzureClientCore.GetAzureOpenAIClientOptions( httpClient, @@ -87,11 +84,8 @@ public AzureOpenAITextToImageService( { Verify.NotNull(credential); - var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri; - if (connectorEndpoint is null) - { - throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - } + var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri) + ?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); var options = AzureClientCore.GetAzureOpenAIClientOptions( httpClient, @@ -133,4 +127,8 @@ public AzureOpenAITextToImageService( /// public Task GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default) => this._client.GenerateImageAsync(this._client.DeploymentName, description, width, height, cancellationToken); + + /// + public Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) + => this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken); } diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj index e187080a2c35..68a194edff23 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj @@ -80,7 +80,10 @@ Always - + + Always + + Always diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs index 1528986b9064..856366548cc5 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs @@ -5,10 +5,14 @@ using System.Net.Http; using System.Threading.Tasks; using Microsoft.Extensions.Logging; +using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; using Moq; using Xunit; +using OpenAI.Images; +using System.Text.Unicode; +using System.Text; namespace SemanticKernel.Connectors.OpenAI.UnitTests.Services; @@ -27,7 +31,7 @@ public OpenAITextToImageServiceTests() { ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) { - Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.txt")) + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.json")) } }; this._httpClient = new HttpClient(this._messageHandlerStub, false); @@ -68,6 +72,185 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string Assert.Equal("https://image-url/", result); } + [Theory] + [InlineData(null, null)] + [InlineData("uri", "url")] + [InlineData("url", "url")] + [InlineData("GeneratedImage.Uri", "url")] + [InlineData("bytes", "b64_json")] + [InlineData("b64_json", "b64_json")] + [InlineData("GeneratedImage.Bytes", "b64_json")] + public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? uri, string? expectedResponseFormat) + { + // Arrange + object? responseFormatObject = uri switch + { + "GeneratedImage.Uri" => GeneratedImageFormat.Uri, + "GeneratedImage.Bytes" => GeneratedImageFormat.Bytes, + _ => uri + }; + + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = responseFormatObject }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedResponseFormat is not null) + { + Assert.Contains($"\"response_format\":\"{expectedResponseFormat}\"", requestBody); + } + else + { + // Then no response format is provided, it should not be included in the request body + Assert.DoesNotContain("response_format", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("hd", "hd")] + [InlineData("high", "hd")] + [InlineData("standard", "standard")] + public async Task GetUriImageContentsImageQualityRequestWorksCorrectlyAsync(string? quality, string? expectedQuality) + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Quality = quality }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedQuality is not null) + { + Assert.Contains($"\"quality\":\"{expectedQuality}\"", requestBody); + } + else + { + // Then no quality is provided, it should not be included in the request body + Assert.DoesNotContain("quality", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("vivid", "vivid")] + [InlineData("natural", "natural")] + public async Task GetUriImageContentsImageStyleRequestWorksCorrectlyAsync(string? style, string? expectedStyle) + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Style = style }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedStyle is not null) + { + Assert.Contains($"\"style\":\"{expectedStyle}\"", requestBody); + } + else + { + // Then no style is provided, it should not be included in the request body + Assert.DoesNotContain("style", requestBody); + } + } + + [Theory] + [InlineData(null, null, null)] + [InlineData(1, 2, "1x2")] + public async Task GetUriImageContentsImageSizeRequestWorksCorrectlyAsync(int? width, int? height, string? expectedSize) + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings + { + Size = width.HasValue && height.HasValue + ? (width.Value, height.Value) + : null + }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedSize is not null) + { + Assert.Contains($"\"size\":\"{expectedSize}\"", requestBody); + } + else + { + // Then no size is provided, it should not be included in the request body + Assert.DoesNotContain("size", requestBody); + } + } + + [Fact] + public async Task GetByteImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) + { + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-b64_json-format-response.json")) + }; + + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "b64_json" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.True(imageContent.CanRead); + Assert.Equal("image/png", imageContent.MimeType); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + + [Fact] + public async Task GetUrlImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "url" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.False(imageContent.CanRead); + Assert.Equal(new Uri("https://image-url/"), imageContent.Uri); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + public void Dispose() { this._httpClient.Dispose(); diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json new file mode 100644 index 000000000000..e004607fa8f0 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json @@ -0,0 +1,9 @@ +{ + "created": 1726234481, + "data": [ + { + "b64_json": "iVBORw0KGgoAAA==", + "revised_prompt": "my prompt" + } + ] +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json new file mode 100644 index 000000000000..db96aba8f869 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json @@ -0,0 +1,9 @@ +{ + "created": 1702575371, + "data": [ + { + "revised_prompt": "my prompt", + "url": "https://image-url/" + } + ] +} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj b/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj index 30b637922494..50c03bec279e 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj +++ b/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj @@ -38,4 +38,8 @@ + + + + diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs index 1cb9c5993eae..84c893af68bf 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.ClientModel; +using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; using OpenAI.Images; @@ -47,4 +49,113 @@ internal async Task GenerateImageAsync( return generatedImage.ImageUri?.ToString() ?? throw new KernelException("The generated image is not in url format"); } + + /// + /// Generates an image with the provided configuration. + /// + /// The input text content to generate the image + /// Execution settings for the image generation + /// Kernel instance + /// Cancellation token + /// List of image generated contents + internal async Task> GetImageContentsAsync( + TextContent input, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default) + { + // Ensure the input is valid + Verify.NotNull(input); + + // Convert the generic execution settings to OpenAI-specific settings + var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings); + + var imageGenerationOptions = new ImageGenerationOptions() + { + Size = GetGeneratedImageSize(imageSettings.Size), + ResponseFormat = GetResponseFormat(imageSettings.ResponseFormat), + Style = GetGeneratedImageStyle(imageSettings.Style), + Quality = GetGeneratedImageQuality(imageSettings.Quality), + EndUserId = imageSettings.EndUserId, + }; + + var targetModel = string.IsNullOrEmpty(imageSettings.ModelId) + ? "dall-e-2" // Defaults to the DALL-E 2 server-side - https://platform.openai.com/docs/api-reference/images/create#images-create-model. + : imageSettings.ModelId; + + ClientResult response = await RunRequestAsync(() => this.Client!.GetImageClient(targetModel).GenerateImageAsync(input.Text, imageGenerationOptions, cancellationToken)).ConfigureAwait(false); + var generatedImage = response.Value; + + List result = []; + if (generatedImage.ImageUri is not null) + { + result.Add(new ImageContent(uri: generatedImage.ImageUri) { InnerContent = generatedImage }); + } + else + { + result.Add(new ImageContent(generatedImage.ImageBytes, "image/png") { InnerContent = generatedImage }); + } + + return result; + } + + private static GeneratedImageSize? GetGeneratedImageSize((int Width, int Height)? size) + => size is null + ? null + : new GeneratedImageSize(size.Value.Width, size.Value.Height); + + private static GeneratedImageQuality? GetGeneratedImageQuality(string? quality) + { + if (quality is null) + { + return null; + } + + return quality.ToUpperInvariant() switch + { + "STANDARD" => GeneratedImageQuality.Standard, + "HIGH" or "HD" => GeneratedImageQuality.High, + _ => throw new NotSupportedException($"The provided quality '{quality}' is not supported.") + }; + } + + private static GeneratedImageStyle? GetGeneratedImageStyle(string? style) + { + if (style is null) + { + return null; + } + + return style.ToUpperInvariant() switch + { + "VIVID" => GeneratedImageStyle.Vivid, + "NATURAL" => GeneratedImageStyle.Natural, + _ => throw new NotSupportedException($"The provided style '{style}' is not supported.") + }; + } + + private static GeneratedImageFormat? GetResponseFormat(object? responseFormat) + { + if (responseFormat is null) + { + return null; + } + + if (responseFormat is GeneratedImageFormat format) + { + return format; + } + + if (responseFormat is string formatString) + { + return formatString.ToUpperInvariant() switch + { + "URI" or "URL" => GeneratedImageFormat.Uri, + "BYTES" or "B64_JSON" => GeneratedImageFormat.Bytes, + _ => throw new NotSupportedException($"The provided response format '{formatString}' is not supported.") + }; + } + + throw new NotSupportedException($"The provided response format type '{responseFormat.GetType()}' is not supported."); + } } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs index f51e7d7c0141..79e6a96a5761 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs @@ -40,6 +40,19 @@ public OpenAITextToImageService( } /// - public Task GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default) + public Task> GetImageContentsAsync( + TextContent input, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default) + => this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken); + + /// + public Task GenerateImageAsync( + string description, + int width, + int height, + Kernel? kernel = null, + CancellationToken cancellationToken = default) => this._client.GenerateImageAsync(this._client.ModelId, description, width, height, cancellationToken); } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs similarity index 75% rename from dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs rename to dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs index 4c58c9e76614..39a61da72791 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs @@ -4,6 +4,7 @@ using System.Text.Json; using System.Text.Json.Serialization; using Microsoft.SemanticKernel.Text; +using OpenAI.Images; namespace Microsoft.SemanticKernel.Connectors.OpenAI; @@ -13,35 +14,29 @@ namespace Microsoft.SemanticKernel.Connectors.OpenAI; [JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)] public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings { - private const int DefaultWidth = 1024; - private const int DefaultHeight = 1024; - /// /// Initializes a new instance of the class. /// public OpenAITextToImageExecutionSettings() { - this.Width = DefaultWidth; - this.Height = DefaultHeight; } /// - /// Width of the generated image. + /// Optional width and height of the generated image. /// - public int Width + public (int Width, int Height)? Size { - get => this._width; + get => this._size; set { this.ThrowIfFrozen(); - this._width = value; + this._size = value; } } /// - /// The quality of the image that will be generated. - /// `hd` creates images with finer details and greater consistency across the image. - /// This param is only supported for dall-e-3. + /// The quality of the image that will be generated. Defaults to "standard" + /// "hd" or "high" creates images with finer details and greater consistency. This param is only supported for dall-e-3. /// public string? Quality { @@ -55,48 +50,50 @@ public string? Quality } /// - /// The number of images to generate. Must be between 1 and 10. - /// For dall-e-3, only ImageCount = 1 is supported. + /// The style of the generated images. Must be one of vivid or natural. + /// Vivid causes the model to lean towards generating hyper-real and dramatic images. + /// Natural causes the model to produce more natural, less hyper-real looking images. + /// This param is only supported for dall-e-3. /// - public int? ImageCount + public string? Style { - get => this._imageCount; + get => this._style; set { this.ThrowIfFrozen(); - this._imageCount = value; + this._style = value; } } /// - /// The style of the generated images. Must be one of vivid or natural. - /// Vivid causes the model to lean towards generating hyper-real and dramatic images. - /// Natural causes the model to produce more natural, less hyper-real looking images. - /// This param is only supported for dall-e-3. + /// The format in which the generated images are returned. + /// Can be a or a string where: + /// + /// Url = "url" or "uri". + /// Base64 = "b64_json" or "bytes". + /// /// - public string? Style + public object? ResponseFormat { - get => this._style; - + get => this._responseFormat; set { this.ThrowIfFrozen(); - this._style = value; + this._responseFormat = value; } } /// - /// Height of the generated image. + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. /// - public int Height + public string? EndUserId { - get => this._height; - + get => this._endUserId; set { this.ThrowIfFrozen(); - this._height = value; + this._endUserId = value; } } @@ -118,8 +115,7 @@ public override PromptExecutionSettings Clone() { ModelId = this.ModelId, ExtensionData = this.ExtensionData is not null ? new Dictionary(this.ExtensionData) : null, - Width = this.Width, - Height = this.Height, + Size = this.Size }; } @@ -149,11 +145,11 @@ public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExe #region private ================================================================================ - private int _width; - private int _height; - private int? _imageCount; + private (int Width, int Height)? _size; private string? _quality; private string? _style; + private object? _responseFormat; + private string? _endUserId; #endregion } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs deleted file mode 100644 index a1abd49388cd..000000000000 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System; -using System.Collections.Generic; -using System.Diagnostics.CodeAnalysis; -using System.Net.Http; -using System.Threading; -using System.Threading.Tasks; -using Azure; -using Azure.AI.OpenAI; -using Azure.Core; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Logging.Abstractions; -using Microsoft.SemanticKernel.Services; -using Microsoft.SemanticKernel.TextToImage; - -namespace Microsoft.SemanticKernel.Connectors.OpenAI; - -/// -/// Azure OpenAI Image generation -/// -/// -[Experimental("SKEXP0010")] -public sealed class AzureOpenAITextToImageService : ITextToImageService -{ - private readonly OpenAIClient _client; - private readonly ILogger _logger; - private readonly string _deploymentName; - private readonly Dictionary _attributes = []; - - /// - public IReadOnlyDictionary Attributes => this._attributes; - - /// - /// Gets the key used to store the deployment name in the dictionary. - /// - public static string DeploymentNameKey => "DeploymentName"; - - /// - /// Create a new instance of Azure OpenAI image generation service - /// - /// Deployment name identifier - /// Azure OpenAI deployment URL - /// Azure OpenAI API key - /// Model identifier - /// Custom for HTTP requests. - /// The ILoggerFactory used to create a logger for logging. If null, no logging will be performed. - /// Azure OpenAI Endpoint ApiVersion - public AzureOpenAITextToImageService( - string deploymentName, - string endpoint, - string apiKey, - string? modelId, - HttpClient? httpClient = null, - ILoggerFactory? loggerFactory = null, - string? apiVersion = null) - { - Verify.NotNullOrWhiteSpace(apiKey); - Verify.NotNullOrWhiteSpace(deploymentName); - - this._deploymentName = deploymentName; - - if (modelId is not null) - { - this.AddAttribute(AIServiceExtensions.ModelIdKey, modelId); - } - this.AddAttribute(DeploymentNameKey, deploymentName); - - this._logger = loggerFactory?.CreateLogger(typeof(AzureOpenAITextToImageService)) ?? NullLogger.Instance; - - var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri) ?? - throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - - this._client = new(new Uri(connectorEndpoint), - new AzureKeyCredential(apiKey), - GetClientOptions(httpClient, apiVersion)); - } - - /// - /// Create a new instance of Azure OpenAI image generation service - /// - /// Deployment name identifier - /// Azure OpenAI deployment URL - /// Token credentials, e.g. DefaultAzureCredential, ManagedIdentityCredential, EnvironmentCredential, etc. - /// Model identifier - /// Custom for HTTP requests. - /// The ILoggerFactory used to create a logger for logging. If null, no logging will be performed. - /// Azure OpenAI Endpoint ApiVersion - public AzureOpenAITextToImageService( - string deploymentName, - string endpoint, - TokenCredential credential, - string? modelId, - HttpClient? httpClient = null, - ILoggerFactory? loggerFactory = null, - string? apiVersion = null) - { - Verify.NotNull(credential); - Verify.NotNullOrWhiteSpace(deploymentName); - - this._deploymentName = deploymentName; - - if (modelId is not null) - { - this.AddAttribute(AIServiceExtensions.ModelIdKey, modelId); - } - this.AddAttribute(DeploymentNameKey, deploymentName); - - this._logger = loggerFactory?.CreateLogger(typeof(AzureOpenAITextToImageService)) ?? NullLogger.Instance; - - var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri; - if (connectorEndpoint is null) - { - throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - } - - this._client = new(new Uri(connectorEndpoint), - credential, - GetClientOptions(httpClient, apiVersion)); - } - - /// - /// Create a new instance of Azure OpenAI image generation service - /// - /// Deployment name identifier - /// to use for the service. - /// Model identifier - /// The ILoggerFactory used to create a logger for logging. If null, no logging will be performed. - public AzureOpenAITextToImageService( - string deploymentName, - OpenAIClient openAIClient, - string? modelId, - ILoggerFactory? loggerFactory = null) - { - Verify.NotNull(openAIClient); - Verify.NotNullOrWhiteSpace(deploymentName); - - this._deploymentName = deploymentName; - - if (modelId is not null) - { - this.AddAttribute(AIServiceExtensions.ModelIdKey, modelId); - } - this.AddAttribute(DeploymentNameKey, deploymentName); - - this._logger = loggerFactory?.CreateLogger(typeof(AzureOpenAITextToImageService)) ?? NullLogger.Instance; - - this._client = openAIClient; - } - - /// - public async Task GenerateImageAsync( - string description, - int width, - int height, - Kernel? kernel = null, - CancellationToken cancellationToken = default) - { - Verify.NotNull(description); - - var size = (width, height) switch - { - (1024, 1024) => ImageSize.Size1024x1024, - (1792, 1024) => ImageSize.Size1792x1024, - (1024, 1792) => ImageSize.Size1024x1792, - _ => throw new NotSupportedException("Dall-E 3 can only generate images of the following sizes 1024x1024, 1792x1024, or 1024x1792") - }; - - Response imageGenerations; - try - { - imageGenerations = await this._client.GetImageGenerationsAsync( - new ImageGenerationOptions - { - DeploymentName = this._deploymentName, - Prompt = description, - Size = size, - }, cancellationToken).ConfigureAwait(false); - } - catch (RequestFailedException e) - { - throw e.ToHttpOperationException(); - } - - if (!imageGenerations.HasValue) - { - throw new KernelException("The response does not contain an image result"); - } - - if (imageGenerations.Value.Data.Count == 0) - { - throw new KernelException("The response does not contain any image"); - } - - return imageGenerations.Value.Data[0].Url.AbsoluteUri; - } - - private static OpenAIClientOptions GetClientOptions(HttpClient? httpClient, string? apiVersion) => - ClientCore.GetOpenAIClientOptions(httpClient, apiVersion switch - { - // DALL-E 3 is supported in the latest API releases - _ => OpenAIClientOptions.ServiceVersion.V2024_02_15_Preview - }); - - internal void AddAttribute(string key, string? value) - { - if (!string.IsNullOrEmpty(value)) - { - this._attributes.Add(key, value); - } - } - - /// - public async Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) - { - var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings); - - Verify.NotNull(input); - - var size = (imageSettings.Width, imageSettings.Height) switch - { - (256, 256) => ImageSize.Size256x256, - (512, 512) => ImageSize.Size512x512, - (1024, 1024) => ImageSize.Size1024x1024, - (1792, 1024) => ImageSize.Size1792x1024, - (1024, 1792) => ImageSize.Size1024x1792, - _ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}") - }; - - Response imageGenerations; - try - { - var options = new ImageGenerationOptions - { - DeploymentName = this._deploymentName, - ImageCount = imageSettings.ImageCount, - Prompt = input.Text, - Size = size, - }; - - if (imageSettings.Quality is not null) - { - options.Quality = imageSettings.Quality; - } - if (imageSettings.Style is not null) - { - options.Style = imageSettings.Style; - } - - imageGenerations = await this._client.GetImageGenerationsAsync(options, cancellationToken).ConfigureAwait(false); - } - catch (RequestFailedException e) - { - throw e.ToHttpOperationException(); - } - - if (!imageGenerations.HasValue) - { - throw new KernelException("The response does not contain an image result"); - } - - if (imageGenerations.Value.Data.Count == 0) - { - throw new KernelException("The response does not contain any image"); - } - - List images = []; - foreach (var image in imageGenerations.Value.Data) - { - if (image.Url is not null) - { - images.Add(new ImageContent(image.Url)); - } - else if (image.Base64Data is not null) - { - images.Add(new ImageContent($"data:;base64,{image.Base64Data}")); - } - else - { - throw new NotSupportedException("Image is neither an URL nor a base64 data"); - } - } - return images; - } -} diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs deleted file mode 100644 index 6e2be2425abf..000000000000 --- a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System; -using System.Collections.Generic; -using System.Diagnostics.CodeAnalysis; -using System.Linq; -using System.Net.Http; -using System.Text.Json; -using System.Threading; -using System.Threading.Tasks; -using Microsoft.Extensions.Logging; -using Microsoft.SemanticKernel.Services; -using Microsoft.SemanticKernel.TextToImage; - -namespace Microsoft.SemanticKernel.Connectors.OpenAI; - -/// -/// OpenAI text to image service. -/// -[Experimental("SKEXP0010")] -public sealed class OpenAITextToImageService : ITextToImageService -{ - private readonly OpenAITextToImageClientCore _core; - - /// - /// OpenAI REST API endpoint - /// - private const string OpenAIEndpoint = "https://api.openai.com/v1/images/generations"; - - /// - /// Optional value for the OpenAI-Organization header. - /// - private readonly string? _organizationHeaderValue; - - /// - /// Value for the authorization header. - /// - private readonly string _authorizationHeaderValue; - - /// - /// The model to use for image generation. - /// - private readonly string? _modelId; - - /// - /// Initializes a new instance of the class. - /// - /// OpenAI API key, see https://platform.openai.com/account/api-keys - /// OpenAI organization id. This is usually optional unless your account belongs to multiple organizations. - /// The model to use for image generation. - /// Custom for HTTP requests. - /// The to use for logging. If null, no logging will be performed. - public OpenAITextToImageService( - string apiKey, - string? organization = null, - string? modelId = null, - HttpClient? httpClient = null, - ILoggerFactory? loggerFactory = null) - { - Verify.NotNullOrWhiteSpace(apiKey); - this._authorizationHeaderValue = $"Bearer {apiKey}"; - this._organizationHeaderValue = organization; - this._modelId = modelId; - - this._core = new(httpClient, loggerFactory?.CreateLogger(this.GetType())); - this._core.AddAttribute(OpenAIClientCore.OrganizationKey, organization); - if (modelId is not null) - { - this._core.AddAttribute(AIServiceExtensions.ModelIdKey, modelId); - } - - this._core.RequestCreated += (_, request) => - { - request.Headers.Add("Authorization", this._authorizationHeaderValue); - if (!string.IsNullOrEmpty(this._organizationHeaderValue)) - { - request.Headers.Add("OpenAI-Organization", this._organizationHeaderValue); - } - }; - } - - /// - public IReadOnlyDictionary Attributes => this._core.Attributes; - - /// - public Task GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default) - { - Verify.NotNull(description); - if (width != height || (width != 256 && width != 512 && width != 1024)) - { - throw new ArgumentOutOfRangeException(nameof(width), width, "OpenAI can generate only square images of size 256x256, 512x512, or 1024x1024."); - } - - return this.GenerateImageAsync(this._modelId, description, width, height, "url", x => x.Url, cancellationToken); - } - - /// - public async Task> GetImageContentsAsync( - TextContent input, - PromptExecutionSettings? executionSettings = null, - Kernel? kernel = null, - CancellationToken cancellationToken = default) - { - // Ensure the input is valid - Verify.NotNull(input); - - // Convert the generic execution settings to OpenAI-specific settings - var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings); - - // Determine the size of the image based on the width and height settings - var size = (imageSettings.Width, imageSettings.Height) switch - { - (256, 256) => "256x256", - (512, 512) => "512x512", - (1024, 1024) => "1024x1024", - (1792, 1024) => "1792x1024", - (1024, 1792) => "1024x1792", - _ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}") - }; - - // Validate quality and style - var supportedQualities = new[] { "standard", "hd" }; - var supportedStyles = new[] { "vivid", "natural" }; - - if (!string.IsNullOrEmpty(imageSettings.Quality) && !supportedQualities.Contains(imageSettings.Quality)) - { - throw new NotSupportedException($"The provided quality '{imageSettings.Quality}' is not supported."); - } - - if (!string.IsNullOrEmpty(imageSettings.Style) && !supportedStyles.Contains(imageSettings.Style)) - { - throw new NotSupportedException($"The provided style '{imageSettings.Style}' is not supported."); - } - - // Create the request body for the image generation - var requestBody = JsonSerializer.Serialize(new TextToImageRequest - { - Model = imageSettings.ModelId ?? this._modelId, - Prompt = input.Text ?? string.Empty, - Size = size, - Count = imageSettings.ImageCount ?? 1, - Quality = imageSettings.Quality ?? "standard", - Style = imageSettings.Style ?? "vivid" - }); - - // Execute the request using the core client and return Image objects - var imageStrings = await this._core.ExecuteImageGenerationRequestAsync(OpenAIEndpoint, requestBody, x => x.Url ?? x.AsBase64, cancellationToken).ConfigureAwait(false); - - // Convert the strings to ImageContent objects - var images = new List(); - foreach (var imageString in imageStrings) - { - if (Uri.TryCreate(imageString, UriKind.Absolute, out var uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps)) - { - images.Add(new ImageContent(uriResult)); - } - else - { - images.Add(new ImageContent($"data:;base64,{imageString}")); - } - } - - return images.AsReadOnly(); - } - - private async Task GenerateImageAsync( - string? model, - string description, - int width, int height, - string format, Func extractResponse, - CancellationToken cancellationToken) - { - Verify.NotNull(extractResponse); - - var requestBody = JsonSerializer.Serialize(new TextToImageRequest - { - Model = model, - Prompt = description, - Size = $"{width}x{height}", - Count = 1, - Format = format, - }); - - var list = await this._core.ExecuteImageGenerationRequestAsync(OpenAIEndpoint, requestBody, extractResponse!, cancellationToken).ConfigureAwait(false); - return list[0]; - } -} diff --git a/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs b/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs index b8c3867ff358..a6bb608bc99f 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs @@ -57,4 +57,13 @@ public override string ToString() { return this.Text ?? string.Empty; } + + /// + /// When converting a string to a , the text is set to the string value. + /// + /// + public static implicit operator TextContent(string text) + { + return new TextContent(text); + } } From b9e40b48d5c6bf694520fd9cabb7034bdde58425 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:31:29 +0100 Subject: [PATCH 06/20] Removed unecessary parameter --- .../Settings/OpenAITextToImageExecutionSettings.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs index 39a61da72791..42ab42f77082 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs @@ -123,9 +123,8 @@ public override PromptExecutionSettings Clone() /// Create a new settings object with the values from another settings object. /// /// Template configuration - /// Default max tokens /// An instance of OpenAIPromptExecutionSettings - public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings, int? defaultMaxTokens = null) + public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings) { if (executionSettings is null) { From 23e2f0354eec8bca8a558f10c056c72799b293b0 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:49:20 +0100 Subject: [PATCH 07/20] Fix Warnings --- .../TextToImage/OpenAI_TextToImageDalle3.cs | 2 +- .../Services/AzureOpenAITextToImageServiceTests.cs | 8 +++----- .../Services/OpenAITextToImageServiceTests.cs | 14 ++++++-------- .../CompatibilitySuppressions.xml | 14 ++++++++++++++ 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs index bf09f9c7687a..e6ae32451170 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs @@ -92,7 +92,7 @@ public async Task SimpleTextToImageExampleAsync() var kernel = builder.Build(); var service = kernel.GetRequiredService(); - var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Width = 1024, Height = 1024 }); + var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Size = (Width: 1792, Height: 1024) }); this.Output.WriteLine(generatedImages[0].Uri!.ToString()); } diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs index 302ba338697d..472f4544b112 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs @@ -2,7 +2,6 @@ using System; using System.IO; -using System.Net; using System.Net.Http; using System.Text; using System.Text.Json; @@ -11,7 +10,6 @@ using Azure.AI.OpenAI; using Azure.Core; using Microsoft.Extensions.Logging; -using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Connectors.AzureOpenAI; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; @@ -156,14 +154,14 @@ public void ItShouldThrowExceptionIfNoEndpointProvided(bool useTokeCredential, s [InlineData("bytes", "b64_json")] [InlineData("b64_json", "b64_json")] [InlineData("GeneratedImage.Bytes", "b64_json")] - public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? uri, string? expectedResponseFormat) + public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? responseFormatOption, string? expectedResponseFormat) { // Arrange - object? responseFormatObject = uri switch + object? responseFormatObject = responseFormatOption switch { "GeneratedImage.Uri" => GeneratedImageFormat.Uri, "GeneratedImage.Bytes" => GeneratedImageFormat.Bytes, - _ => uri + _ => responseFormatOption }; this._httpClient.BaseAddress = new Uri("https://api-host"); diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs index 856366548cc5..a06fe29bcce1 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs @@ -3,16 +3,14 @@ using System; using System.IO; using System.Net.Http; +using System.Text; using System.Threading.Tasks; using Microsoft.Extensions.Logging; -using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; using Moq; -using Xunit; using OpenAI.Images; -using System.Text.Unicode; -using System.Text; +using Xunit; namespace SemanticKernel.Connectors.OpenAI.UnitTests.Services; @@ -42,7 +40,7 @@ public OpenAITextToImageServiceTests() public void ConstructorWorksCorrectly() { // Arrange & Act - var sut = new OpenAITextToImageService("apikey", "organization", "model"); + var sut = new OpenAITextToImageService("apiKey", "organization", "model"); // Assert Assert.NotNull(sut); @@ -80,14 +78,14 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string [InlineData("bytes", "b64_json")] [InlineData("b64_json", "b64_json")] [InlineData("GeneratedImage.Bytes", "b64_json")] - public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? uri, string? expectedResponseFormat) + public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? responseFormatOption, string? expectedResponseFormat) { // Arrange - object? responseFormatObject = uri switch + object? responseFormatObject = responseFormatOption switch { "GeneratedImage.Uri" => GeneratedImageFormat.Uri, "GeneratedImage.Bytes" => GeneratedImageFormat.Bytes, - _ => uri + _ => responseFormatOption }; var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); diff --git a/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml index 178c983ba1d1..4e0f61b8bf4b 100644 --- a/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml +++ b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml @@ -15,4 +15,18 @@ lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll true + + CP0006 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0006 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + true + \ No newline at end of file From 3e0c9679bd45609da67b14cfc98662bc4a944730 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:01:00 +0100 Subject: [PATCH 08/20] Add missing xmldocs and small changes --- .../src/SemanticKernel.Abstractions/Contents/TextContent.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs b/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs index a6bb608bc99f..558ab739d279 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs @@ -59,9 +59,9 @@ public override string ToString() } /// - /// When converting a string to a , the text is set to the string value. + /// When converting a string to a , the content is automatically set to the string value. /// - /// + /// Text content public static implicit operator TextContent(string text) { return new TextContent(text); From 17c1492fdbe22dd2a2b60dba2866cdd1ec07784e Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:19:13 +0100 Subject: [PATCH 09/20] GenerateImage extension method UT working --- .../AzureOpenAITextToImageServiceTests.cs | 5 +- .../Core/AzureClientCore.TextToImage.cs | 18 ++++++++ .../Services/AzureOpenAITextToAudioService.cs | 8 +--- .../Services/AzureOpenAITextToImageService.cs | 19 +++----- .../Services/OpenAITextToImageServiceTests.cs | 3 ++ .../Core/ClientCore.TextToImage.cs | 8 ++-- ...AITextToImageExecutionSettingsConverter.cs | 27 +++++++++++ .../Services/OpenAITextToImageService.cs | 9 ---- .../OpenAITextToImageExecutionSettings.cs | 37 ++++++++++++++- .../AI/TextToImage/ITextToImageService.cs | 17 ------- .../TextToImageServiceExtensions.cs | 46 +++++++++++++++++++ 11 files changed, 148 insertions(+), 49 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs create mode 100644 dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs index 472f4544b112..215c6bacd10e 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs @@ -13,6 +13,7 @@ using Microsoft.SemanticKernel.Connectors.AzureOpenAI; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; +using Microsoft.SemanticKernel.TextToImage; using Moq; using OpenAI.Images; @@ -74,7 +75,9 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string var sut = new AzureOpenAITextToImageService("deployment", "https://api-host", "api-key", modelId, this._httpClient, loggerFactory: this._mockLoggerFactory.Object); // Act +#pragma warning disable CS0618 // Type or member is obsolete var result = await sut.GenerateImageAsync("description", width, height); +#pragma warning restore CS0618 // Type or member is obsolete // Assert Assert.Equal("https://image-url/", result); @@ -83,7 +86,7 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string Assert.NotNull(request); Assert.Equal("description", request["prompt"]?.ToString()); Assert.Equal("deployment", request["model"]?.ToString()); - Assert.Equal("url", request["response_format"]?.ToString()); + Assert.Null(request["response_format"]); Assert.Equal($"{width}x{height}", request["size"]?.ToString()); } diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs new file mode 100644 index 000000000000..9ca3e7b1cf04 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.OpenAI; + +namespace Microsoft.SemanticKernel.Connectors.AzureOpenAI; + +internal partial class AzureClientCore : ClientCore +{ + protected override string GetModelId(string? settingsModelId) + { + return settingsModelId ?? this.DeploymentName; + } +} diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs index 0863d156a5b4..54077a7aab1b 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs @@ -57,9 +57,7 @@ public AzureOpenAITextToAudioService( { var url = !string.IsNullOrWhiteSpace(httpClient?.BaseAddress?.AbsoluteUri) ? httpClient!.BaseAddress!.AbsoluteUri : endpoint; - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech var azureOpenAIClient = new AzureOpenAIClient(new Uri(url), apiKey, options); @@ -89,9 +87,7 @@ public AzureOpenAITextToAudioService( { var url = !string.IsNullOrWhiteSpace(httpClient?.BaseAddress?.AbsoluteUri) ? httpClient!.BaseAddress!.AbsoluteUri : endpoint; - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech var azureOpenAIClient = new AzureOpenAIClient(new Uri(url), credential, options); diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs index b11840a2cf1f..0e7c9cfa7c76 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs @@ -46,12 +46,13 @@ public AzureOpenAITextToImageService( { Verify.NotNullOrWhiteSpace(apiKey); - var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri) - ?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); + var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri; + if (connectorEndpoint is null) + { + throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); + } - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation var azureOpenAIClient = new AzureOpenAIClient(new Uri(connectorEndpoint), apiKey, options); @@ -87,9 +88,7 @@ public AzureOpenAITextToImageService( var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri) ?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation var azureOpenAIClient = new AzureOpenAIClient(new Uri(connectorEndpoint), credential, options); @@ -124,10 +123,6 @@ public AzureOpenAITextToImageService( } } - /// - public Task GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default) - => this._client.GenerateImageAsync(this._client.DeploymentName, description, width, height, cancellationToken); - /// public Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) => this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken); diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs index a06fe29bcce1..70b3e3dbadc0 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs @@ -8,6 +8,7 @@ using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; +using Microsoft.SemanticKernel.TextToImage; using Moq; using OpenAI.Images; using Xunit; @@ -64,7 +65,9 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string Assert.Equal(modelId, sut.Attributes["ModelId"]); // Act +#pragma warning disable CS0618 // Type or member is obsolete var result = await sut.GenerateImageAsync("description", width, height); +#pragma warning restore CS0618 // Type or member is obsolete // Assert Assert.Equal("https://image-url/", result); diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs index 84c893af68bf..4dbc417bcc20 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs @@ -79,9 +79,7 @@ internal async Task> GetImageContentsAsync( EndUserId = imageSettings.EndUserId, }; - var targetModel = string.IsNullOrEmpty(imageSettings.ModelId) - ? "dall-e-2" // Defaults to the DALL-E 2 server-side - https://platform.openai.com/docs/api-reference/images/create#images-create-model. - : imageSettings.ModelId; + var targetModel = this.GetModelId(imageSettings.ModelId); ClientResult response = await RunRequestAsync(() => this.Client!.GetImageClient(targetModel).GenerateImageAsync(input.Text, imageGenerationOptions, cancellationToken)).ConfigureAwait(false); var generatedImage = response.Value; @@ -99,6 +97,10 @@ internal async Task> GetImageContentsAsync( return result; } + protected virtual string GetModelId(string? settingsModelId) + // Defaults to the DALL-E 2 server-side - https://platform.openai.com/docs/api-reference/images/create#images-create-model. + => string.IsNullOrEmpty(settingsModelId) ? "dall-e-2" : settingsModelId!; + private static GeneratedImageSize? GetGeneratedImageSize((int Width, int Height)? size) => size is null ? null diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs new file mode 100644 index 000000000000..55a2a82537fe --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json.Serialization; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Microsoft.SemanticKernel.Connectors.OpenAI.Core; +/*internal class OpenAITextToImageExecutionSettingsConverter : JsonConverter +{ + public override OpenAITextToImageExecutionSettings Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + var settings = base.Read(ref reader, typeToConvert, options); + + // I need to read the width and height properties from the JSON object after the base.Read() call + + return settings; + } + + public override void Write(Utf8JsonWriter writer, OpenAITextToImageExecutionSettings value, JsonSerializerOptions options) + { + base.Write(writer, value, options); + } +}*/ diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs index 79e6a96a5761..89003fc2a553 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs @@ -46,13 +46,4 @@ public Task> GetImageContentsAsync( Kernel? kernel = null, CancellationToken cancellationToken = default) => this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken); - - /// - public Task GenerateImageAsync( - string description, - int width, - int height, - Kernel? kernel = null, - CancellationToken cancellationToken = default) - => this._client.GenerateImageAsync(this._client.ModelId, description, width, height, cancellationToken); } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs index 42ab42f77082..3448abd96036 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs @@ -20,6 +20,7 @@ public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings public OpenAITextToImageExecutionSettings() { } + /// /// Optional width and height of the generated image. /// @@ -38,6 +39,7 @@ public OpenAITextToImageExecutionSettings() /// The quality of the image that will be generated. Defaults to "standard" /// "hd" or "high" creates images with finer details and greater consistency. This param is only supported for dall-e-3. /// + [JsonPropertyName("quality")] public string? Quality { get => this._quality; @@ -55,6 +57,7 @@ public string? Quality /// Natural causes the model to produce more natural, less hyper-real looking images. /// This param is only supported for dall-e-3. /// + [JsonPropertyName("style")] public string? Style { get => this._style; @@ -74,6 +77,7 @@ public string? Style /// Base64 = "b64_json" or "bytes". /// /// + [JsonPropertyName("response_format")] public object? ResponseFormat { get => this._responseFormat; @@ -87,6 +91,7 @@ public object? ResponseFormat /// /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. /// + [JsonPropertyName("user")] public string? EndUserId { get => this._endUserId; @@ -137,13 +142,43 @@ public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExe } var json = JsonSerializer.Serialize(executionSettings); + var openAIExecutionSettings = JsonSerializer.Deserialize(json, JsonOptionsCache.ReadPermissive)!; + if (openAIExecutionSettings.ExtensionData?.TryGetValue("width", out var width) ?? false) + { + openAIExecutionSettings.Width = ((JsonElement)width).GetInt32(); + } + if (openAIExecutionSettings.ExtensionData?.TryGetValue("height", out var height) ?? false) + { + openAIExecutionSettings.Height = ((JsonElement)height).GetInt32(); + } - var openAIExecutionSettings = JsonSerializer.Deserialize(json, JsonOptionsCache.ReadPermissive); return openAIExecutionSettings!; } #region private ================================================================================ + [JsonPropertyName("width")] + internal int? Width + { + get => this.Size?.Width; + set + { + if (!value.HasValue) { return; } + this.Size = (value.Value, this.Size?.Height ?? 0); + } + } + + [JsonPropertyName("height")] + internal int? Height + { + get => this.Size?.Height; + set + { + if (!value.HasValue) { return; } + this.Size = (this.Size?.Width ?? 0, value.Value); + } + } + private (int Width, int Height)? _size; private string? _quality; private string? _style; diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs index de10f02f6a36..6144be4baa5b 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs @@ -14,23 +14,6 @@ namespace Microsoft.SemanticKernel.TextToImage; [Experimental("SKEXP0001")] public interface ITextToImageService : IAIService { - /// - /// Generate an image matching the given description - /// - /// Image generation prompt - /// Image width in pixels - /// Image height in pixels - /// The containing services, plugins, and other state for use throughout the operation. - /// The to monitor for cancellation requests. The default is . - /// Generated image in base64 format or image URL - [Experimental("SKEXP0001")] - public Task GenerateImageAsync( - string description, - int width, - int height, - Kernel? kernel = null, - CancellationToken cancellationToken = default); - /// /// Generate images matching the given text description /// diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs new file mode 100644 index 000000000000..cbcbbbbfcf53 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Threading.Tasks; +using System.Threading; +using System.Text.Json; +using System; + +namespace Microsoft.SemanticKernel.TextToImage; + +/// +/// Extension methods for . +/// +public static class TextToImageServiceExtensions +{ + /// + /// Generate an image matching the given description + /// + /// Target instance + /// Image generation prompt + /// Image width in pixels + /// Image height in pixels + /// The containing services, plugins, and other state for use throughout the operation. + /// The to monitor for cancellation requests. The default is . + /// Generated image in base64 format or image URL + [Obsolete("This method is obsolete. Use GetImageContentsAsync instead.")] + public static async Task GenerateImageAsync(this ITextToImageService service, + string description, + int width, + int height, + Kernel? kernel = null, + CancellationToken cancellationToken = default) + { + var imageJson = $$""" + { + "width": {{width}}, + "height": {{height}} + } + """; + + var executionSettings = JsonSerializer.Deserialize(imageJson); + + var result = await service.GetImageContentsAsync(new TextContent(description), executionSettings, kernel, cancellationToken).ConfigureAwait(false); + + return result[0].Uri!.ToString(); + } +} From 2141016595257f6b3ef63ac570e19abbd762feb8 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:30:46 +0100 Subject: [PATCH 10/20] Updating ITs --- .../AzureOpenAITextToImageTests.cs | 27 +++++++++++++++++++ .../OpenAI/OpenAITextToImageTests.cs | 23 ++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs index 4b2b65dd5417..fb417d990dd1 100644 --- a/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs @@ -4,6 +4,7 @@ using Azure.Identity; using Microsoft.Extensions.Configuration; using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.TextToImage; using SemanticKernel.IntegrationTests.TestSettings; using Xunit; @@ -42,4 +43,30 @@ public async Task ItCanReturnImageUrlAsync() Assert.NotNull(result); Assert.StartsWith("https://", result); } + + [Fact] + public async Task GetImageContentsCanReturnImageUrlAsync() + { + // Arrange + AzureOpenAIConfiguration? configuration = this._configuration.GetSection("AzureOpenAITextToImage").Get(); + Assert.NotNull(configuration); + + var kernel = Kernel.CreateBuilder() + .AddAzureOpenAITextToImage( + deploymentName: configuration.DeploymentName, + endpoint: configuration.Endpoint, + credentials: new AzureCliCredential()) + .Build(); + + var service = kernel.GetRequiredService(); + + // Act + var result = await service.GetImageContentsAsync("The sun rises in the east and sets in the west.", new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + + // Assert + Assert.NotNull(result); + Assert.NotEmpty(result); + Assert.NotEmpty(result[0].Uri!.ToString()); + Assert.StartsWith("https://", result[0].Uri); + } } diff --git a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs index 85512760dcd0..9a01f2b23db8 100644 --- a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs @@ -3,6 +3,7 @@ using System.Threading.Tasks; using Microsoft.Extensions.Configuration; using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.TextToImage; using SemanticKernel.IntegrationTests.TestSettings; using Xunit; @@ -60,4 +61,26 @@ public async Task OpenAITextToImageUseDallE2ByDefaultAsync() Assert.NotNull(result); Assert.NotEmpty(result); } + + [Fact] + public async Task OpenAITextToImageDalle3GetImagesTestAsync() + { + // Arrange + OpenAIConfiguration? openAIConfiguration = this._configuration.GetSection("OpenAITextToImage").Get(); + Assert.NotNull(openAIConfiguration); + + var kernel = Kernel.CreateBuilder() + .AddOpenAITextToImage(apiKey: openAIConfiguration.ApiKey, modelId: "dall-e-3") + .Build(); + + var service = kernel.GetRequiredService(); + + // Act + var result = await service.GetImageContentsAsync("The sun rises in the east and sets in the west.", new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + + // Assert + Assert.NotNull(result); + Assert.NotEmpty(result); + Assert.NotEmpty(result[0].Uri!.ToString()); + } } From dc7f8efbe82afa78e9e6ed242d73cc247d8db16d Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:38:00 +0100 Subject: [PATCH 11/20] Address extra folder --- .../src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj b/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj index 028735b06890..0a94324fb7d5 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj +++ b/dotnet/src/Connectors/Connectors.OpenAI/Connectors.OpenAI.csproj @@ -39,8 +39,4 @@ - - - - From 373870e15b2ac1ccbb14ee3db59a532d9e3429c4 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Tue, 24 Sep 2024 07:46:21 +0100 Subject: [PATCH 12/20] Fix warnings --- .../Core/AzureClientCore.TextToImage.cs | 5 ----- .../OpenAITextToImageExecutionSettingsConverter.cs | 11 +++++++++-- .../AI/TextToImage/TextToImageServiceExtensions.cs | 6 +++--- .../CompatibilitySuppressions.xml | 14 ++++++++++++++ 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs index 9ca3e7b1cf04..a14123481373 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs @@ -1,10 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using Microsoft.SemanticKernel.Connectors.OpenAI; namespace Microsoft.SemanticKernel.Connectors.AzureOpenAI; diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs index 55a2a82537fe..311008d4ce2d 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs @@ -1,13 +1,20 @@ // Copyright (c) Microsoft. All rights reserved. +/* Unmerged change from project 'Connectors.OpenAI(netstandard2.0)' +Before: using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.Json.Serialization; -using System.Text.Json; -using System.Threading.Tasks; +After: +using System.Text.Json.Serialization; +*/ +/* Unmerged change from project 'Connectors.OpenAI(netstandard2.0)' +Removed: +using System.Threading.Tasks; +*/ namespace Microsoft.SemanticKernel.Connectors.OpenAI.Core; /*internal class OpenAITextToImageExecutionSettingsConverter : JsonConverter { diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs index cbcbbbbfcf53..ab1d1470b1df 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs @@ -1,9 +1,9 @@ // Copyright (c) Microsoft. All rights reserved. -using System.Threading.Tasks; -using System.Threading; -using System.Text.Json; using System; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; namespace Microsoft.SemanticKernel.TextToImage; diff --git a/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml index 4e0f61b8bf4b..efe6b722680f 100644 --- a/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml +++ b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml @@ -8,6 +8,13 @@ lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll true + + CP0002 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + true + CP0002 M:Microsoft.SemanticKernel.Agents.OpenAI.AnnotationContent.#ctor(System.String,System.Object,System.Collections.Generic.IReadOnlyDictionary{System.String,System.Object}) @@ -15,6 +22,13 @@ lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll true + + CP0002 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + true + CP0006 M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) From de0c4f231ff7a3606f863537936e83667ab66ab2 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:00:57 +0100 Subject: [PATCH 13/20] Fix warnings --- .../AzureOpenAITextToImageServiceTests.cs | 4 ++-- .../AzureOpenAIPromptExecutionSettingsTests.cs | 5 +++-- .../CompatibilitySuppressions.xml | 18 ++++++++++++++++++ .../CompatibilitySuppressions.xml | 18 ++++++++++++++++++ .../AzureOpenAI/AzureOpenAITextToImageTests.cs | 4 +++- .../OpenAI/OpenAITextToImageTests.cs | 2 ++ 6 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml create mode 100644 dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs index 215c6bacd10e..c79575e80527 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs @@ -17,6 +17,8 @@ using Moq; using OpenAI.Images; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Services; /// @@ -75,9 +77,7 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string var sut = new AzureOpenAITextToImageService("deployment", "https://api-host", "api-key", modelId, this._httpClient, loggerFactory: this._mockLoggerFactory.Object); // Act -#pragma warning disable CS0618 // Type or member is obsolete var result = await sut.GenerateImageAsync("description", width, height); -#pragma warning restore CS0618 // Type or member is obsolete // Assert Assert.Equal("https://image-url/", result); diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs index 427815fc44cb..d8ff5b1e0d79 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs @@ -7,6 +7,8 @@ using Microsoft.SemanticKernel.Connectors.AzureOpenAI; using Microsoft.SemanticKernel.Connectors.OpenAI; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Settings; /// @@ -242,9 +244,8 @@ public void FromExecutionSettingsWithDataDoesNotIncludeEmptyStopSequences() var executionSettings = new AzureOpenAIPromptExecutionSettings { StopSequences = [] }; // Act -#pragma warning disable CS0618 // AzureOpenAIChatCompletionWithData is deprecated in favor of OpenAIPromptExecutionSettings.AzureChatExtensionsOptions var executionSettingsWithData = AzureOpenAIPromptExecutionSettings.FromExecutionSettingsWithData(executionSettings); -#pragma warning restore CS0618 + // Assert Assert.Null(executionSettingsWithData.StopSequences); } diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml b/dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml new file mode 100644 index 000000000000..86629bb200cf --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml @@ -0,0 +1,18 @@ + + + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.AzureOpenAI.AzureOpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + lib/net8.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + true + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.AzureOpenAI.AzureOpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + true + + \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml b/dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml new file mode 100644 index 000000000000..c3b3af979029 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml @@ -0,0 +1,18 @@ + + + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + true + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + true + + \ No newline at end of file diff --git a/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs index fb417d990dd1..ac3f6d020c55 100644 --- a/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs @@ -9,6 +9,8 @@ using SemanticKernel.IntegrationTests.TestSettings; using Xunit; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.IntegrationTests.Connectors.AzureOpenAI; public sealed class AzureOpenAITextToImageTests @@ -67,6 +69,6 @@ public async Task GetImageContentsCanReturnImageUrlAsync() Assert.NotNull(result); Assert.NotEmpty(result); Assert.NotEmpty(result[0].Uri!.ToString()); - Assert.StartsWith("https://", result[0].Uri); + Assert.StartsWith("https://", result[0].Uri!.ToString()); } } diff --git a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs index 9a01f2b23db8..1c3f1b66ed69 100644 --- a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs @@ -8,6 +8,8 @@ using SemanticKernel.IntegrationTests.TestSettings; using Xunit; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.IntegrationTests.Connectors.OpenAI; public sealed class OpenAITextToImageTests { From a783563b1b7926154b3d621c7be220c4a81afd7c Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:24:45 +0100 Subject: [PATCH 14/20] Update Obsolete Concepts --- .../TextToImage/OpenAI_TextToImageDalle3.cs | 34 +++++++++---------- .../OpenAI/OpenAITextToImageTests.cs | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs index e6ae32451170..3a65659c9a9f 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs @@ -97,22 +97,18 @@ public async Task SimpleTextToImageExampleAsync() this.Output.WriteLine(generatedImages[0].Uri!.ToString()); } - [Fact(Skip = "Generating the Image can take too long and often break the test")] - public async Task AzureOpenAIDallEAsync() + [Fact] + public async Task OpenAIDallE3Async() { - Console.WriteLine("========Azure OpenAI DALL-E 3 Text To Image ========"); + Console.WriteLine("======== OpenAI DALL-E 3 Text To Image ========"); var builder = Kernel.CreateBuilder() - .AddAzureOpenAITextToImage( // Add your text to image service - deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName, - endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint, - apiKey: TestConfiguration.AzureOpenAI.ImageApiKey, - modelId: TestConfiguration.AzureOpenAI.ImageModelId, - apiVersion: "2024-02-15-preview") //DALL-E 3 is only supported in this version - .AddAzureOpenAIChatCompletion( // Add your chat completion service - deploymentName: TestConfiguration.AzureOpenAI.ChatDeploymentName, - endpoint: TestConfiguration.AzureOpenAI.Endpoint, - apiKey: TestConfiguration.AzureOpenAI.ApiKey); + .AddOpenAITextToImage( // Add your text to image service + modelId: "dall-e-3", + apiKey: TestConfiguration.OpenAI.ApiKey) //DALL-E 3 is only supported in this version + .AddOpenAIChatCompletion( // Add your chat completion service + modelId: TestConfiguration.OpenAI.ChatModelId, + apiKey: TestConfiguration.OpenAI.ApiKey); builder.Services.ConfigureHttpClientDefaults(c => { @@ -120,7 +116,7 @@ public async Task AzureOpenAIDallEAsync() c.AddStandardResilienceHandler().Configure(o => { o.Retry.MaxRetryAttempts = 5; - o.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(60); + o.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(120); }); }); @@ -128,10 +124,10 @@ public async Task AzureOpenAIDallEAsync() ITextToImageService dallE = kernel.GetRequiredService(); var imageDescription = "A cute baby sea otter"; - var image = await dallE.GenerateImageAsync(imageDescription, 1024, 1024); + var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); Console.WriteLine(imageDescription); - Console.WriteLine("Image URL: " + image); + Console.WriteLine("Image URL: " + images[0].Uri!.ToString()); /* Output: @@ -155,7 +151,8 @@ A cute baby sea otter var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); chatHistory.Add(reply); - image = await dallE.GenerateImageAsync(reply.Content!, 1024, 1024); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + var image = images[0].Uri!.ToString(); Console.WriteLine("Bot: " + image); Console.WriteLine("Img description: " + reply); @@ -165,7 +162,8 @@ A cute baby sea otter reply = await chatGPT.GetChatMessageContentAsync(chatHistory); chatHistory.Add(reply); - image = await dallE.GenerateImageAsync(reply.Content!, 1024, 1024); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + image = images[0].Uri!.ToString(); Console.WriteLine("Bot: " + image); Console.WriteLine("Img description: " + reply); diff --git a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs index 1c3f1b66ed69..aff58810ce42 100644 --- a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs @@ -83,6 +83,6 @@ public async Task OpenAITextToImageDalle3GetImagesTestAsync() // Assert Assert.NotNull(result); Assert.NotEmpty(result); - Assert.NotEmpty(result[0].Uri!.ToString()); + Assert.NotEmpty(result[0].Uri!.ToString()); } } From 34da6a7cef66a81531e53a0069e318fb64a2fab6 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Tue, 24 Sep 2024 10:38:09 +0100 Subject: [PATCH 15/20] Fix warning --- .../TextToImage/OpenAI_TextToImageDalle3.cs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs index 3a65659c9a9f..39331e2ad846 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs @@ -13,7 +13,7 @@ namespace TextToImage; public class OpenAI_TextToImageDalle3(ITestOutputHelper output) : BaseTest(output) { [Fact] - public async Task OpenAIDallEAsync() + public async Task OpenAIDallE2Async() { Console.WriteLine("======== OpenAI DALL-E 2 Text To Image ========"); @@ -25,8 +25,8 @@ public async Task OpenAIDallEAsync() ITextToImageService dallE = kernel.GetRequiredService(); var imageDescription = "A cute baby sea otter"; - var image = await dallE.GenerateImageAsync(imageDescription, 256, 256); - + var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (256, 256) }); + var image = images[0].Uri!.ToString(); Console.WriteLine(imageDescription); Console.WriteLine("Image URL: " + image); @@ -52,7 +52,8 @@ A cute baby sea otter var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); chatHistory.Add(reply); - image = await dallE.GenerateImageAsync(reply.Content!, 256, 256); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (256, 256) }); + image = images[0].Uri!.ToString(); Console.WriteLine("Bot: " + image); Console.WriteLine("Img description: " + reply); @@ -62,7 +63,8 @@ A cute baby sea otter reply = await chatGPT.GetChatMessageContentAsync(chatHistory); chatHistory.Add(reply); - image = await dallE.GenerateImageAsync(reply.Content!, 256, 256); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (256, 256) }); + image = images[0].Uri!.ToString(); Console.WriteLine("Bot: " + image); Console.WriteLine("Img description: " + reply); @@ -127,12 +129,12 @@ public async Task OpenAIDallE3Async() var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); Console.WriteLine(imageDescription); - Console.WriteLine("Image URL: " + images[0].Uri!.ToString()); + Console.WriteLine("Image URL: " + images[0].Uri!); /* Output: A cute baby sea otter - Image URL: https://dalleproduse.blob.core.windows.net/private/images/.... + Image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/org-/.... */ From f0c95cd1aa923cd06345e22e367d603e2725f162 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:59:04 +0100 Subject: [PATCH 16/20] Adding suppressions were needed --- .../CompatibilitySuppressions.xml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml diff --git a/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml new file mode 100644 index 000000000000..f0c61ea95587 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml @@ -0,0 +1,32 @@ + + + + + CP0002 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0002 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0006 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0006 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + true + + \ No newline at end of file From 31b6d8c03f433e0b8755b3ae66d7c80a4df2e64e Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:27:37 +0100 Subject: [PATCH 17/20] Address PR comments --- .../Core/AzureClientCore.TextToImage.cs | 13 ------ .../Core/AzureClientCore.cs | 4 ++ .../Services/AzureOpenAITextToImageService.cs | 2 +- .../Core/ClientCore.TextToImage.cs | 8 +--- .../Connectors.OpenAI/Core/ClientCore.cs | 6 +++ ...AITextToImageExecutionSettingsConverter.cs | 34 -------------- .../OpenAITextToImageExecutionSettings.cs | 46 ++++++++++++------- .../AI/TextToImage/ITextToImageService.cs | 2 +- .../TextToImageServiceExtensions.cs | 4 +- 9 files changed, 44 insertions(+), 75 deletions(-) delete mode 100644 dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs delete mode 100644 dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs deleted file mode 100644 index a14123481373..000000000000 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.TextToImage.cs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Microsoft.SemanticKernel.Connectors.OpenAI; - -namespace Microsoft.SemanticKernel.Connectors.AzureOpenAI; - -internal partial class AzureClientCore : ClientCore -{ - protected override string GetModelId(string? settingsModelId) - { - return settingsModelId ?? this.DeploymentName; - } -} diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs index 3634d934b4a2..598ed85726e6 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs @@ -138,4 +138,8 @@ internal static AzureOpenAIClientOptions GetAzureOpenAIClientOptions(HttpClient? return options; } + + /// + protected override string GetClientModelId() + => this.DeploymentName; } diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs index 0e7c9cfa7c76..30bb2616ac4d 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs @@ -125,5 +125,5 @@ public AzureOpenAITextToImageService( /// public Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) - => this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken); + => this._client.GetImageContentsAsync(this._client.DeploymentName, input, executionSettings, kernel, cancellationToken); } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs index 4dbc417bcc20..7d09f0805bb1 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs @@ -53,12 +53,14 @@ internal async Task GenerateImageAsync( /// /// Generates an image with the provided configuration. /// + /// Model identifier /// The input text content to generate the image /// Execution settings for the image generation /// Kernel instance /// Cancellation token /// List of image generated contents internal async Task> GetImageContentsAsync( + string targetModel, TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, @@ -79,8 +81,6 @@ internal async Task> GetImageContentsAsync( EndUserId = imageSettings.EndUserId, }; - var targetModel = this.GetModelId(imageSettings.ModelId); - ClientResult response = await RunRequestAsync(() => this.Client!.GetImageClient(targetModel).GenerateImageAsync(input.Text, imageGenerationOptions, cancellationToken)).ConfigureAwait(false); var generatedImage = response.Value; @@ -97,10 +97,6 @@ internal async Task> GetImageContentsAsync( return result; } - protected virtual string GetModelId(string? settingsModelId) - // Defaults to the DALL-E 2 server-side - https://platform.openai.com/docs/api-reference/images/create#images-create-model. - => string.IsNullOrEmpty(settingsModelId) ? "dall-e-2" : settingsModelId!; - private static GeneratedImageSize? GetGeneratedImageSize((int Width, int Height)? size) => size is null ? null diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs index 3b882e58679e..55bc3d15aad1 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs @@ -211,6 +211,12 @@ private static OpenAIClientOptions GetOpenAIClientOptions(HttpClient? httpClient return options; } + /// + /// Gets the model identifier to use for the client. + /// + protected virtual string GetClientModelId() + => this.ModelId; + /// /// Invokes the specified request and handles exceptions. /// diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs deleted file mode 100644 index 311008d4ce2d..000000000000 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAITextToImageExecutionSettingsConverter.cs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -/* Unmerged change from project 'Connectors.OpenAI(netstandard2.0)' -Before: -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Text.Json.Serialization; -After: -using System.Text.Json.Serialization; -*/ - -/* Unmerged change from project 'Connectors.OpenAI(netstandard2.0)' -Removed: -using System.Threading.Tasks; -*/ -namespace Microsoft.SemanticKernel.Connectors.OpenAI.Core; -/*internal class OpenAITextToImageExecutionSettingsConverter : JsonConverter -{ - public override OpenAITextToImageExecutionSettings Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) - { - var settings = base.Read(ref reader, typeToConvert, options); - - // I need to read the width and height properties from the JSON object after the base.Read() call - - return settings; - } - - public override void Write(Utf8JsonWriter writer, OpenAITextToImageExecutionSettings value, JsonSerializerOptions options) - { - base.Write(writer, value, options); - } -}*/ diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs index 3448abd96036..13e8a6b74b1f 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs @@ -14,16 +14,15 @@ namespace Microsoft.SemanticKernel.Connectors.OpenAI; [JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)] public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings { - /// - /// Initializes a new instance of the class. - /// - public OpenAITextToImageExecutionSettings() - { - } - /// /// Optional width and height of the generated image. /// + /// + /// + /// Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2 model. + /// Must be one of 1024x1024, 1792x1024, 1024x1792 for dall-e-3 model. + /// + /// public (int Width, int Height)? Size { get => this._size; @@ -36,9 +35,16 @@ public OpenAITextToImageExecutionSettings() } /// - /// The quality of the image that will be generated. Defaults to "standard" - /// "hd" or "high" creates images with finer details and greater consistency. This param is only supported for dall-e-3. + /// The quality of the image that will be generated. /// + /// + /// Must be one of standard or hd or high. + /// + /// standard: creates images with standard quality. This is the default. + /// hd OR high: creates images with finer details and greater consistency. + /// + /// This param is only supported for dall-e-3 model. + /// [JsonPropertyName("quality")] public string? Quality { @@ -52,11 +58,16 @@ public string? Quality } /// - /// The style of the generated images. Must be one of vivid or natural. - /// Vivid causes the model to lean towards generating hyper-real and dramatic images. - /// Natural causes the model to produce more natural, less hyper-real looking images. - /// This param is only supported for dall-e-3. + /// The style of the generated images. /// + /// + /// Must be one of vivid or natural. + /// + /// vivid: causes the model to lean towards generating hyper-real and dramatic images. + /// natural: causes the model to produce more natural, less hyper-real looking images. + /// + /// This param is only supported for dall-e-3 model. + /// [JsonPropertyName("style")] public string? Style { @@ -70,11 +81,12 @@ public string? Style } /// - /// The format in which the generated images are returned. - /// Can be a or a string where: + /// The format of the generated images. + /// Can be a or a string where: /// - /// Url = "url" or "uri". - /// Base64 = "b64_json" or "bytes". + /// : causes the model to generated in the provided format + /// url OR uri: causes the model to return an url for the generated images. + /// b64_json or bytes: causes the model to return in a Base64 format the content of the images. /// /// [JsonPropertyName("response_format")] diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs index 6144be4baa5b..3eb2d890aa54 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs @@ -15,7 +15,7 @@ namespace Microsoft.SemanticKernel.TextToImage; public interface ITextToImageService : IAIService { /// - /// Generate images matching the given text description + /// Given a prompt and/or an input text, the model will generate a new image. /// /// Input text for image generation /// Text to image execution settings diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs index ab1d1470b1df..26945f32c4a4 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs @@ -1,6 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System; using System.Text.Json; using System.Threading; using System.Threading.Tasks; @@ -13,7 +12,7 @@ namespace Microsoft.SemanticKernel.TextToImage; public static class TextToImageServiceExtensions { /// - /// Generate an image matching the given description + /// Given a prompt and/or an input text, the model will generate a new image. /// /// Target instance /// Image generation prompt @@ -22,7 +21,6 @@ public static class TextToImageServiceExtensions /// The containing services, plugins, and other state for use throughout the operation. /// The to monitor for cancellation requests. The default is . /// Generated image in base64 format or image URL - [Obsolete("This method is obsolete. Use GetImageContentsAsync instead.")] public static async Task GenerateImageAsync(this ITextToImageService service, string description, int width, From 3ea6fabeca311bf689bd131a923b936bb08a9ea4 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:34:34 +0100 Subject: [PATCH 18/20] Address UT and IT --- .../Services/OpenAITextToImageServiceTests.cs | 2 -- .../Connectors.OpenAI/Services/OpenAITextToImageService.cs | 4 ++-- .../Connectors/OpenAI/OpenAITextToImageTests.cs | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs index 70b3e3dbadc0..0d91f1e14588 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs @@ -65,9 +65,7 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string Assert.Equal(modelId, sut.Attributes["ModelId"]); // Act -#pragma warning disable CS0618 // Type or member is obsolete var result = await sut.GenerateImageAsync("description", width, height); -#pragma warning restore CS0618 // Type or member is obsolete // Assert Assert.Equal("https://image-url/", result); diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs index 89003fc2a553..4967d87228ff 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs @@ -36,7 +36,7 @@ public OpenAITextToImageService( HttpClient? httpClient = null, ILoggerFactory? loggerFactory = null) { - this._client = new(modelId, apiKey, organization, null, httpClient, loggerFactory?.CreateLogger(this.GetType())); + this._client = new(modelId ?? "dall-e-2", apiKey, organization, null, httpClient, loggerFactory?.CreateLogger(this.GetType())); } /// @@ -45,5 +45,5 @@ public Task> GetImageContentsAsync( PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) - => this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken); + => this._client.GetImageContentsAsync(this._client.ModelId, input, executionSettings, kernel, cancellationToken); } diff --git a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs index aff58810ce42..07524b592973 100644 --- a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs @@ -51,7 +51,7 @@ public async Task OpenAITextToImageUseDallE2ByDefaultAsync() Assert.NotNull(openAIConfiguration); var kernel = Kernel.CreateBuilder() - .AddOpenAITextToImage(apiKey: openAIConfiguration.ApiKey, modelId: null) + .AddOpenAITextToImage(apiKey: openAIConfiguration.ApiKey) .Build(); var service = kernel.GetRequiredService(); From af81e60e0da0fa96c476103c241b85ad17c776c9 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:51:28 +0100 Subject: [PATCH 19/20] Adding extension method usage concepts --- ...ToImageDalle3.cs => OpenAI_TextToImage.cs} | 2 +- .../TextToImage/OpenAI_TextToImageLegacy.cs | 165 ++++++++++++++++++ 2 files changed, 166 insertions(+), 1 deletion(-) rename dotnet/samples/Concepts/TextToImage/{OpenAI_TextToImageDalle3.cs => OpenAI_TextToImage.cs} (99%) create mode 100644 dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImage.cs similarity index 99% rename from dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs rename to dotnet/samples/Concepts/TextToImage/OpenAI_TextToImage.cs index 39331e2ad846..96dbb53edb81 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImage.cs @@ -10,7 +10,7 @@ namespace TextToImage; // The following example shows how to use Semantic Kernel with OpenAI DALL-E 2 to create images -public class OpenAI_TextToImageDalle3(ITestOutputHelper output) : BaseTest(output) +public class OpenAI_TextToImage(ITestOutputHelper output) : BaseTest(output) { [Fact] public async Task OpenAIDallE2Async() diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs new file mode 100644 index 000000000000..fe6f68ade45b --- /dev/null +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs @@ -0,0 +1,165 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Http.Resilience; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.TextToImage; + +namespace TextToImage; + +/// +/// The following example shows how you can still use the previous "ITextToImageService.GenerateImageAsync" API to generate images. +/// +public class OpenAI_TextToImageLegacy(ITestOutputHelper output) : BaseTest(output) +{ + [Fact] + public async Task OpenAIDallEAsync() + { + Console.WriteLine("======== OpenAI DALL-E 2 Text To Image ========"); + + Kernel kernel = Kernel.CreateBuilder() + .AddOpenAITextToImage(TestConfiguration.OpenAI.ApiKey) // Add your text to image service + .AddOpenAIChatCompletion(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey) // Add your chat completion service + .Build(); + + ITextToImageService dallE = kernel.GetRequiredService(); + + var imageDescription = "A cute baby sea otter"; + var image = await dallE.GenerateImageAsync(imageDescription, 256, 256); + + Console.WriteLine(imageDescription); + Console.WriteLine("Image URL: " + image); + + /* Output: + + A cute baby sea otter + Image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/.... + + */ + + Console.WriteLine("======== Chat with images ========"); + + var chatGPT = kernel.GetRequiredService(); + var chatHistory = new ChatHistory( + "You're chatting with a user. Instead of replying directly to the user" + + " provide the description of an image that expresses what you want to say." + + " The user won't see your message, they will see only the image. The system " + + " generates an image using your description, so it's important you describe the image with details."); + + var msg = "Hi, I'm from Tokyo, where are you from?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + image = await dallE.GenerateImageAsync(reply.Content!, 256, 256); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + msg = "Oh, wow. Not sure where that is, could you provide more details?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + image = await dallE.GenerateImageAsync(reply.Content!, 256, 256); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + /* Output: + + User: Hi, I'm from Tokyo, where are you from? + Bot: https://oaidalleapiprodscus.blob.core.windows.net/private/... + Img description: [An image of a globe with a pin dropped on a location in the middle of the ocean] + + User: Oh, wow. Not sure where that is, could you provide more details? + Bot: https://oaidalleapiprodscus.blob.core.windows.net/private/... + Img description: [An image of a map zooming in on the pin location, revealing a small island with a palm tree on it] + + */ + } + + [Fact(Skip = "Generating the Image can take too long and often break the test")] + public async Task AzureOpenAIDallEAsync() + { + Console.WriteLine("========Azure OpenAI DALL-E 3 Text To Image ========"); + + var builder = Kernel.CreateBuilder() + .AddAzureOpenAITextToImage( // Add your text to image service + deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName, + endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint, + apiKey: TestConfiguration.AzureOpenAI.ImageApiKey, + modelId: TestConfiguration.AzureOpenAI.ImageModelId, + apiVersion: "2024-02-15-preview") //DALL-E 3 is only supported in this version + .AddAzureOpenAIChatCompletion( // Add your chat completion service + deploymentName: TestConfiguration.AzureOpenAI.ChatDeploymentName, + endpoint: TestConfiguration.AzureOpenAI.Endpoint, + apiKey: TestConfiguration.AzureOpenAI.ApiKey); + + builder.Services.ConfigureHttpClientDefaults(c => + { + // Use a standard resiliency policy, augmented to retry 5 times + c.AddStandardResilienceHandler().Configure(o => + { + o.Retry.MaxRetryAttempts = 5; + o.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(60); + }); + }); + + var kernel = builder.Build(); + + ITextToImageService dallE = kernel.GetRequiredService(); + var imageDescription = "A cute baby sea otter"; + var image = await dallE.GenerateImageAsync(imageDescription, 1024, 1024); + + Console.WriteLine(imageDescription); + Console.WriteLine("Image URL: " + image); + + /* Output: + + A cute baby sea otter + Image URL: https://dalleproduse.blob.core.windows.net/private/images/.... + + */ + + Console.WriteLine("======== Chat with images ========"); + + var chatGPT = kernel.GetRequiredService(); + var chatHistory = new ChatHistory( + "You're chatting with a user. Instead of replying directly to the user" + + " provide the description of an image that expresses what you want to say." + + " The user won't see your message, they will see only the image. The system " + + " generates an image using your description, so it's important you describe the image with details."); + + var msg = "Hi, I'm from Tokyo, where are you from?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + image = await dallE.GenerateImageAsync(reply.Content!, 1024, 1024); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + msg = "Oh, wow. Not sure where that is, could you provide more details?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + image = await dallE.GenerateImageAsync(reply.Content!, 1024, 1024); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + /* Output: + + User: Hi, I'm from Tokyo, where are you from? + Bot: https://dalleproduse.blob.core.windows.net/private/images/...... + Img description: [An image of a globe with a pin dropped on a location in the middle of the ocean] + + User: Oh, wow. Not sure where that is, could you provide more details? + Bot: https://dalleproduse.blob.core.windows.net/private/images/...... + Img description: [An image of a map zooming in on the pin location, revealing a small island with a palm tree on it] + + */ + } +} From a459d5470a5077fd0bf2a1e107c66e606adf438b Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:52:50 +0100 Subject: [PATCH 20/20] Adding missing MS header --- .../samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs index fe6f68ade45b..c2f208dd8334 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs @@ -1,4 +1,6 @@ -using Microsoft.Extensions.DependencyInjection; +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Http.Resilience; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion;