diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImage.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImage.cs new file mode 100644 index 000000000000..96dbb53edb81 --- /dev/null +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImage.cs @@ -0,0 +1,184 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Http.Resilience; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.OpenAI; +using Microsoft.SemanticKernel.TextToImage; + +namespace TextToImage; + +// The following example shows how to use Semantic Kernel with OpenAI DALL-E 2 to create images +public class OpenAI_TextToImage(ITestOutputHelper output) : BaseTest(output) +{ + [Fact] + public async Task OpenAIDallE2Async() + { + Console.WriteLine("======== OpenAI DALL-E 2 Text To Image ========"); + + Kernel kernel = Kernel.CreateBuilder() + .AddOpenAITextToImage(TestConfiguration.OpenAI.ApiKey) // Add your text to image service + .AddOpenAIChatCompletion(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey) // Add your chat completion service + .Build(); + + ITextToImageService dallE = kernel.GetRequiredService(); + + var imageDescription = "A cute baby sea otter"; + var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (256, 256) }); + var image = images[0].Uri!.ToString(); + Console.WriteLine(imageDescription); + Console.WriteLine("Image URL: " + image); + + /* Output: + + A cute baby sea otter + Image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/.... + + */ + + Console.WriteLine("======== Chat with images ========"); + + var chatGPT = kernel.GetRequiredService(); + var chatHistory = new ChatHistory( + "You're chatting with a user. Instead of replying directly to the user" + + " provide the description of an image that expresses what you want to say." + + " The user won't see your message, they will see only the image. The system " + + " generates an image using your description, so it's important you describe the image with details."); + + var msg = "Hi, I'm from Tokyo, where are you from?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (256, 256) }); + image = images[0].Uri!.ToString(); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + msg = "Oh, wow. Not sure where that is, could you provide more details?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (256, 256) }); + image = images[0].Uri!.ToString(); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + /* Output: + + User: Hi, I'm from Tokyo, where are you from? + Bot: https://oaidalleapiprodscus.blob.core.windows.net/private/... + Img description: [An image of a globe with a pin dropped on a location in the middle of the ocean] + + User: Oh, wow. Not sure where that is, could you provide more details? + Bot: https://oaidalleapiprodscus.blob.core.windows.net/private/... + Img description: [An image of a map zooming in on the pin location, revealing a small island with a palm tree on it] + + */ + } + + [Fact] + public async Task SimpleTextToImageExampleAsync() + { + var builder = Kernel.CreateBuilder() + .AddAzureOpenAITextToImage( // Add your text to image service + deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName, + endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint, + apiKey: TestConfiguration.AzureOpenAI.ImageApiKey, + modelId: TestConfiguration.AzureOpenAI.ImageModelId); + + var kernel = builder.Build(); + var service = kernel.GetRequiredService(); + + var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Size = (Width: 1792, Height: 1024) }); + + this.Output.WriteLine(generatedImages[0].Uri!.ToString()); + } + + [Fact] + public async Task OpenAIDallE3Async() + { + Console.WriteLine("======== OpenAI DALL-E 3 Text To Image ========"); + + var builder = Kernel.CreateBuilder() + .AddOpenAITextToImage( // Add your text to image service + modelId: "dall-e-3", + apiKey: TestConfiguration.OpenAI.ApiKey) //DALL-E 3 is only supported in this version + .AddOpenAIChatCompletion( // Add your chat completion service + modelId: TestConfiguration.OpenAI.ChatModelId, + apiKey: TestConfiguration.OpenAI.ApiKey); + + builder.Services.ConfigureHttpClientDefaults(c => + { + // Use a standard resiliency policy, augmented to retry 5 times + c.AddStandardResilienceHandler().Configure(o => + { + o.Retry.MaxRetryAttempts = 5; + o.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(120); + }); + }); + + var kernel = builder.Build(); + + ITextToImageService dallE = kernel.GetRequiredService(); + var imageDescription = "A cute baby sea otter"; + var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + + Console.WriteLine(imageDescription); + Console.WriteLine("Image URL: " + images[0].Uri!); + + /* Output: + + A cute baby sea otter + Image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/org-/.... + + */ + + Console.WriteLine("======== Chat with images ========"); + + var chatGPT = kernel.GetRequiredService(); + var chatHistory = new ChatHistory( + "You're chatting with a user. Instead of replying directly to the user" + + " provide the description of an image that expresses what you want to say." + + " The user won't see your message, they will see only the image. The system " + + " generates an image using your description, so it's important you describe the image with details."); + + var msg = "Hi, I'm from Tokyo, where are you from?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + var image = images[0].Uri!.ToString(); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + msg = "Oh, wow. Not sure where that is, could you provide more details?"; + chatHistory.AddUserMessage(msg); + Console.WriteLine("User: " + msg); + + reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + image = images[0].Uri!.ToString(); + Console.WriteLine("Bot: " + image); + Console.WriteLine("Img description: " + reply); + + /* Output: + + User: Hi, I'm from Tokyo, where are you from? + Bot: https://dalleproduse.blob.core.windows.net/private/images/...... + Img description: [An image of a globe with a pin dropped on a location in the middle of the ocean] + + User: Oh, wow. Not sure where that is, could you provide more details? + Bot: https://dalleproduse.blob.core.windows.net/private/images/...... + Img description: [An image of a map zooming in on the pin location, revealing a small island with a palm tree on it] + + */ + } +} diff --git a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs similarity index 96% rename from dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs rename to dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs index 32e78c9382a8..c2f208dd8334 100644 --- a/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs +++ b/dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageLegacy.cs @@ -8,8 +8,10 @@ namespace TextToImage; -// The following example shows how to use Semantic Kernel with OpenAI DALL-E 2 to create images -public class OpenAI_TextToImageDalle3(ITestOutputHelper output) : BaseTest(output) +/// +/// The following example shows how you can still use the previous "ITextToImageService.GenerateImageAsync" API to generate images. +/// +public class OpenAI_TextToImageLegacy(ITestOutputHelper output) : BaseTest(output) { [Fact] public async Task OpenAIDallEAsync() diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs index 60aed7875b56..c79575e80527 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAITextToImageServiceTests.cs @@ -3,6 +3,7 @@ using System; using System.IO; using System.Net.Http; +using System.Text; using System.Text.Json; using System.Text.Json.Nodes; using System.Threading.Tasks; @@ -10,8 +11,13 @@ using Azure.Core; using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel.Connectors.AzureOpenAI; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; +using Microsoft.SemanticKernel.TextToImage; using Moq; +using OpenAI.Images; + +#pragma warning disable CS0618 // Type or member is obsolete namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Services; @@ -30,7 +36,7 @@ public AzureOpenAITextToImageServiceTests() { ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) { - Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.txt")) + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.json")) } }; this._httpClient = new HttpClient(this._messageHandlerStub, false); @@ -80,7 +86,7 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string Assert.NotNull(request); Assert.Equal("description", request["prompt"]?.ToString()); Assert.Equal("deployment", request["model"]?.ToString()); - Assert.Equal("url", request["response_format"]?.ToString()); + Assert.Null(request["response_format"]); Assert.Equal($"{width}x{height}", request["size"]?.ToString()); } @@ -143,6 +149,191 @@ public void ItShouldThrowExceptionIfNoEndpointProvided(bool useTokeCredential, s } } + [Theory] + [InlineData(null, null)] + [InlineData("uri", "url")] + [InlineData("url", "url")] + [InlineData("GeneratedImage.Uri", "url")] + [InlineData("bytes", "b64_json")] + [InlineData("b64_json", "b64_json")] + [InlineData("GeneratedImage.Bytes", "b64_json")] + public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? responseFormatOption, string? expectedResponseFormat) + { + // Arrange + object? responseFormatObject = responseFormatOption switch + { + "GeneratedImage.Uri" => GeneratedImageFormat.Uri, + "GeneratedImage.Bytes" => GeneratedImageFormat.Bytes, + _ => responseFormatOption + }; + + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = responseFormatObject }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedResponseFormat is not null) + { + Assert.Contains($"\"response_format\":\"{expectedResponseFormat}\"", requestBody); + } + else + { + // Then no response format is provided, it should not be included in the request body + Assert.DoesNotContain("response_format", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("hd", "hd")] + [InlineData("high", "hd")] + [InlineData("standard", "standard")] + public async Task GetUriImageContentsImageQualityRequestWorksCorrectlyAsync(string? quality, string? expectedQuality) + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Quality = quality }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedQuality is not null) + { + Assert.Contains($"\"quality\":\"{expectedQuality}\"", requestBody); + } + else + { + // Then no quality is provided, it should not be included in the request body + Assert.DoesNotContain("quality", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("vivid", "vivid")] + [InlineData("natural", "natural")] + public async Task GetUriImageContentsImageStyleRequestWorksCorrectlyAsync(string? style, string? expectedStyle) + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Style = style }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedStyle is not null) + { + Assert.Contains($"\"style\":\"{expectedStyle}\"", requestBody); + } + else + { + // Then no style is provided, it should not be included in the request body + Assert.DoesNotContain("style", requestBody); + } + } + + [Theory] + [InlineData(null, null, null)] + [InlineData(1, 2, "1x2")] + public async Task GetUriImageContentsImageSizeRequestWorksCorrectlyAsync(int? width, int? height, string? expectedSize) + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings + { + Size = width.HasValue && height.HasValue + ? (width.Value, height.Value) + : null + }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedSize is not null) + { + Assert.Contains($"\"size\":\"{expectedSize}\"", requestBody); + } + else + { + // Then no size is provided, it should not be included in the request body + Assert.DoesNotContain("size", requestBody); + } + } + + [Fact] + public async Task GetByteImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) + { + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-b64_json-format-response.json")) + }; + + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "b64_json" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.True(imageContent.CanRead); + Assert.Equal("image/png", imageContent.MimeType); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + + [Fact] + public async Task GetUrlImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + this._httpClient.BaseAddress = new Uri("https://api-host"); + var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock().Object, "dall-e-3", this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "url" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.False(imageContent.CanRead); + Assert.Equal(new Uri("https://image-url/"), imageContent.Uri); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + public void Dispose() { this._httpClient.Dispose(); diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs index 427815fc44cb..d8ff5b1e0d79 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs @@ -7,6 +7,8 @@ using Microsoft.SemanticKernel.Connectors.AzureOpenAI; using Microsoft.SemanticKernel.Connectors.OpenAI; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Settings; /// @@ -242,9 +244,8 @@ public void FromExecutionSettingsWithDataDoesNotIncludeEmptyStopSequences() var executionSettings = new AzureOpenAIPromptExecutionSettings { StopSequences = [] }; // Act -#pragma warning disable CS0618 // AzureOpenAIChatCompletionWithData is deprecated in favor of OpenAIPromptExecutionSettings.AzureChatExtensionsOptions var executionSettingsWithData = AzureOpenAIPromptExecutionSettings.FromExecutionSettingsWithData(executionSettings); -#pragma warning restore CS0618 + // Assert Assert.Null(executionSettingsWithData.StopSequences); } diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json new file mode 100644 index 000000000000..e004607fa8f0 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json @@ -0,0 +1,9 @@ +{ + "created": 1726234481, + "data": [ + { + "b64_json": "iVBORw0KGgoAAA==", + "revised_prompt": "my prompt" + } + ] +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json similarity index 71% rename from dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt rename to dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json index 7d8f7327a5ec..8fd01a13c7ac 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.txt +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.json @@ -2,6 +2,7 @@ "created": 1702575371, "data": [ { + "revised_prompt": "my prompt", "url": "https://image-url/" } ] diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt deleted file mode 100644 index 1d6f2150b1d5..000000000000 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/TestData/text-to-image-response.txt +++ /dev/null @@ -1,9 +0,0 @@ -{ - "created": 1702575371, - "data": [ - { - "revised_prompt": "A photo capturing the diversity of the Earth's landscapes.", - "url": "https://image-url/" - } - ] -} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml b/dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml new file mode 100644 index 000000000000..86629bb200cf --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/CompatibilitySuppressions.xml @@ -0,0 +1,18 @@ + + + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.AzureOpenAI.AzureOpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + lib/net8.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + true + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.AzureOpenAI.AzureOpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.AzureOpenAI.dll + true + + \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs index 3634d934b4a2..598ed85726e6 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs @@ -138,4 +138,8 @@ internal static AzureOpenAIClientOptions GetAzureOpenAIClientOptions(HttpClient? return options; } + + /// + protected override string GetClientModelId() + => this.DeploymentName; } diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs index 0863d156a5b4..54077a7aab1b 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToAudioService.cs @@ -57,9 +57,7 @@ public AzureOpenAITextToAudioService( { var url = !string.IsNullOrWhiteSpace(httpClient?.BaseAddress?.AbsoluteUri) ? httpClient!.BaseAddress!.AbsoluteUri : endpoint; - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech var azureOpenAIClient = new AzureOpenAIClient(new Uri(url), apiKey, options); @@ -89,9 +87,7 @@ public AzureOpenAITextToAudioService( { var url = !string.IsNullOrWhiteSpace(httpClient?.BaseAddress?.AbsoluteUri) ? httpClient!.BaseAddress!.AbsoluteUri : endpoint; - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#text-to-speech var azureOpenAIClient = new AzureOpenAIClient(new Uri(url), credential, options); diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs index b066cc4b3e66..30bb2616ac4d 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Services/AzureOpenAITextToImageService.cs @@ -52,9 +52,7 @@ public AzureOpenAITextToImageService( throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); } - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation var azureOpenAIClient = new AzureOpenAIClient(new Uri(connectorEndpoint), apiKey, options); @@ -87,15 +85,10 @@ public AzureOpenAITextToImageService( { Verify.NotNull(credential); - var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri; - if (connectorEndpoint is null) - { - throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - } + var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri) + ?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided."); - var options = AzureClientCore.GetAzureOpenAIClientOptions( - httpClient, - AzureOpenAIClientOptions.ServiceVersion.V2024_05_01_Preview); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation + var options = AzureClientCore.GetAzureOpenAIClientOptions(httpClient); // DALL-E 3 is supported in the latest API releases - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#image-generation var azureOpenAIClient = new AzureOpenAIClient(new Uri(connectorEndpoint), credential, options); @@ -131,6 +124,6 @@ public AzureOpenAITextToImageService( } /// - public Task GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default) - => this._client.GenerateImageAsync(this._client.DeploymentName, description, width, height, cancellationToken); + public Task> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) + => this._client.GetImageContentsAsync(this._client.DeploymentName, input, executionSettings, kernel, cancellationToken); } diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj index d80e3bd914de..fbf387f76a33 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj @@ -86,7 +86,10 @@ Always - + + Always + + Always diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs index 1528986b9064..0d91f1e14588 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAITextToImageServiceTests.cs @@ -3,11 +3,14 @@ using System; using System.IO; using System.Net.Http; +using System.Text; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Services; +using Microsoft.SemanticKernel.TextToImage; using Moq; +using OpenAI.Images; using Xunit; namespace SemanticKernel.Connectors.OpenAI.UnitTests.Services; @@ -27,7 +30,7 @@ public OpenAITextToImageServiceTests() { ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) { - Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.txt")) + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.json")) } }; this._httpClient = new HttpClient(this._messageHandlerStub, false); @@ -38,7 +41,7 @@ public OpenAITextToImageServiceTests() public void ConstructorWorksCorrectly() { // Arrange & Act - var sut = new OpenAITextToImageService("apikey", "organization", "model"); + var sut = new OpenAITextToImageService("apiKey", "organization", "model"); // Assert Assert.NotNull(sut); @@ -68,6 +71,185 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string Assert.Equal("https://image-url/", result); } + [Theory] + [InlineData(null, null)] + [InlineData("uri", "url")] + [InlineData("url", "url")] + [InlineData("GeneratedImage.Uri", "url")] + [InlineData("bytes", "b64_json")] + [InlineData("b64_json", "b64_json")] + [InlineData("GeneratedImage.Bytes", "b64_json")] + public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? responseFormatOption, string? expectedResponseFormat) + { + // Arrange + object? responseFormatObject = responseFormatOption switch + { + "GeneratedImage.Uri" => GeneratedImageFormat.Uri, + "GeneratedImage.Bytes" => GeneratedImageFormat.Bytes, + _ => responseFormatOption + }; + + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = responseFormatObject }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedResponseFormat is not null) + { + Assert.Contains($"\"response_format\":\"{expectedResponseFormat}\"", requestBody); + } + else + { + // Then no response format is provided, it should not be included in the request body + Assert.DoesNotContain("response_format", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("hd", "hd")] + [InlineData("high", "hd")] + [InlineData("standard", "standard")] + public async Task GetUriImageContentsImageQualityRequestWorksCorrectlyAsync(string? quality, string? expectedQuality) + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Quality = quality }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedQuality is not null) + { + Assert.Contains($"\"quality\":\"{expectedQuality}\"", requestBody); + } + else + { + // Then no quality is provided, it should not be included in the request body + Assert.DoesNotContain("quality", requestBody); + } + } + + [Theory] + [InlineData(null, null)] + [InlineData("vivid", "vivid")] + [InlineData("natural", "natural")] + public async Task GetUriImageContentsImageStyleRequestWorksCorrectlyAsync(string? style, string? expectedStyle) + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Style = style }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedStyle is not null) + { + Assert.Contains($"\"style\":\"{expectedStyle}\"", requestBody); + } + else + { + // Then no style is provided, it should not be included in the request body + Assert.DoesNotContain("style", requestBody); + } + } + + [Theory] + [InlineData(null, null, null)] + [InlineData(1, 2, "1x2")] + public async Task GetUriImageContentsImageSizeRequestWorksCorrectlyAsync(int? width, int? height, string? expectedSize) + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings + { + Size = width.HasValue && height.HasValue + ? (width.Value, height.Value) + : null + }); + + // Assert + Assert.NotNull(result); + Assert.NotNull(this._messageHandlerStub.RequestContent); + + var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent); + if (expectedSize is not null) + { + Assert.Contains($"\"size\":\"{expectedSize}\"", requestBody); + } + else + { + // Then no size is provided, it should not be included in the request body + Assert.DoesNotContain("size", requestBody); + } + } + + [Fact] + public async Task GetByteImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK) + { + Content = new StringContent(File.ReadAllText("./TestData/text-to-image-b64_json-format-response.json")) + }; + + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "b64_json" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.True(imageContent.CanRead); + Assert.Equal("image/png", imageContent.MimeType); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + + [Fact] + public async Task GetUrlImageContentsResponseWorksCorrectlyAsync() + { + // Arrange + var sut = new OpenAITextToImageService("api-key", httpClient: this._httpClient); + + // Act + var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "url" }); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + var imageContent = result[0]; + Assert.NotNull(imageContent); + Assert.False(imageContent.CanRead); + Assert.Equal(new Uri("https://image-url/"), imageContent.Uri); + Assert.NotNull(imageContent.InnerContent); + Assert.IsType(imageContent.InnerContent); + + var breakingGlass = imageContent.InnerContent as GeneratedImage; + Assert.Equal("my prompt", breakingGlass!.RevisedPrompt); + } + public void Dispose() { this._httpClient.Dispose(); diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json new file mode 100644 index 000000000000..e004607fa8f0 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-b64_json-format-response.json @@ -0,0 +1,9 @@ +{ + "created": 1726234481, + "data": [ + { + "b64_json": "iVBORw0KGgoAAA==", + "revised_prompt": "my prompt" + } + ] +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json new file mode 100644 index 000000000000..db96aba8f869 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/text-to-image-response.json @@ -0,0 +1,9 @@ +{ + "created": 1702575371, + "data": [ + { + "revised_prompt": "my prompt", + "url": "https://image-url/" + } + ] +} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml b/dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml new file mode 100644 index 000000000000..c3b3af979029 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI/CompatibilitySuppressions.xml @@ -0,0 +1,18 @@ + + + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + lib/net8.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + true + + + CP0002 + M:Microsoft.SemanticKernel.Connectors.OpenAI.OpenAITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Connectors.OpenAI.dll + true + + \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs index 1cb9c5993eae..7d09f0805bb1 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.TextToImage.cs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.ClientModel; +using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; using OpenAI.Images; @@ -47,4 +49,111 @@ internal async Task GenerateImageAsync( return generatedImage.ImageUri?.ToString() ?? throw new KernelException("The generated image is not in url format"); } + + /// + /// Generates an image with the provided configuration. + /// + /// Model identifier + /// The input text content to generate the image + /// Execution settings for the image generation + /// Kernel instance + /// Cancellation token + /// List of image generated contents + internal async Task> GetImageContentsAsync( + string targetModel, + TextContent input, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default) + { + // Ensure the input is valid + Verify.NotNull(input); + + // Convert the generic execution settings to OpenAI-specific settings + var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings); + + var imageGenerationOptions = new ImageGenerationOptions() + { + Size = GetGeneratedImageSize(imageSettings.Size), + ResponseFormat = GetResponseFormat(imageSettings.ResponseFormat), + Style = GetGeneratedImageStyle(imageSettings.Style), + Quality = GetGeneratedImageQuality(imageSettings.Quality), + EndUserId = imageSettings.EndUserId, + }; + + ClientResult response = await RunRequestAsync(() => this.Client!.GetImageClient(targetModel).GenerateImageAsync(input.Text, imageGenerationOptions, cancellationToken)).ConfigureAwait(false); + var generatedImage = response.Value; + + List result = []; + if (generatedImage.ImageUri is not null) + { + result.Add(new ImageContent(uri: generatedImage.ImageUri) { InnerContent = generatedImage }); + } + else + { + result.Add(new ImageContent(generatedImage.ImageBytes, "image/png") { InnerContent = generatedImage }); + } + + return result; + } + + private static GeneratedImageSize? GetGeneratedImageSize((int Width, int Height)? size) + => size is null + ? null + : new GeneratedImageSize(size.Value.Width, size.Value.Height); + + private static GeneratedImageQuality? GetGeneratedImageQuality(string? quality) + { + if (quality is null) + { + return null; + } + + return quality.ToUpperInvariant() switch + { + "STANDARD" => GeneratedImageQuality.Standard, + "HIGH" or "HD" => GeneratedImageQuality.High, + _ => throw new NotSupportedException($"The provided quality '{quality}' is not supported.") + }; + } + + private static GeneratedImageStyle? GetGeneratedImageStyle(string? style) + { + if (style is null) + { + return null; + } + + return style.ToUpperInvariant() switch + { + "VIVID" => GeneratedImageStyle.Vivid, + "NATURAL" => GeneratedImageStyle.Natural, + _ => throw new NotSupportedException($"The provided style '{style}' is not supported.") + }; + } + + private static GeneratedImageFormat? GetResponseFormat(object? responseFormat) + { + if (responseFormat is null) + { + return null; + } + + if (responseFormat is GeneratedImageFormat format) + { + return format; + } + + if (responseFormat is string formatString) + { + return formatString.ToUpperInvariant() switch + { + "URI" or "URL" => GeneratedImageFormat.Uri, + "BYTES" or "B64_JSON" => GeneratedImageFormat.Bytes, + _ => throw new NotSupportedException($"The provided response format '{formatString}' is not supported.") + }; + } + + throw new NotSupportedException($"The provided response format type '{responseFormat.GetType()}' is not supported."); + } } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs index 3b882e58679e..55bc3d15aad1 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.cs @@ -211,6 +211,12 @@ private static OpenAIClientOptions GetOpenAIClientOptions(HttpClient? httpClient return options; } + /// + /// Gets the model identifier to use for the client. + /// + protected virtual string GetClientModelId() + => this.ModelId; + /// /// Invokes the specified request and handles exceptions. /// diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs index f51e7d7c0141..4967d87228ff 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Services/OpenAITextToImageService.cs @@ -36,10 +36,14 @@ public OpenAITextToImageService( HttpClient? httpClient = null, ILoggerFactory? loggerFactory = null) { - this._client = new(modelId, apiKey, organization, null, httpClient, loggerFactory?.CreateLogger(this.GetType())); + this._client = new(modelId ?? "dall-e-2", apiKey, organization, null, httpClient, loggerFactory?.CreateLogger(this.GetType())); } /// - public Task GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default) - => this._client.GenerateImageAsync(this._client.ModelId, description, width, height, cancellationToken); + public Task> GetImageContentsAsync( + TextContent input, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default) + => this._client.GetImageContentsAsync(this._client.ModelId, input, executionSettings, kernel, cancellationToken); } diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs new file mode 100644 index 000000000000..13e8a6b74b1f --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAITextToImageExecutionSettings.cs @@ -0,0 +1,201 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.SemanticKernel.Text; +using OpenAI.Images; + +namespace Microsoft.SemanticKernel.Connectors.OpenAI; + +/// +/// Text to image execution settings for an OpenAI image generation request. +/// +[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)] +public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings +{ + /// + /// Optional width and height of the generated image. + /// + /// + /// + /// Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2 model. + /// Must be one of 1024x1024, 1792x1024, 1024x1792 for dall-e-3 model. + /// + /// + public (int Width, int Height)? Size + { + get => this._size; + + set + { + this.ThrowIfFrozen(); + this._size = value; + } + } + + /// + /// The quality of the image that will be generated. + /// + /// + /// Must be one of standard or hd or high. + /// + /// standard: creates images with standard quality. This is the default. + /// hd OR high: creates images with finer details and greater consistency. + /// + /// This param is only supported for dall-e-3 model. + /// + [JsonPropertyName("quality")] + public string? Quality + { + get => this._quality; + + set + { + this.ThrowIfFrozen(); + this._quality = value; + } + } + + /// + /// The style of the generated images. + /// + /// + /// Must be one of vivid or natural. + /// + /// vivid: causes the model to lean towards generating hyper-real and dramatic images. + /// natural: causes the model to produce more natural, less hyper-real looking images. + /// + /// This param is only supported for dall-e-3 model. + /// + [JsonPropertyName("style")] + public string? Style + { + get => this._style; + + set + { + this.ThrowIfFrozen(); + this._style = value; + } + } + + /// + /// The format of the generated images. + /// Can be a or a string where: + /// + /// : causes the model to generated in the provided format + /// url OR uri: causes the model to return an url for the generated images. + /// b64_json or bytes: causes the model to return in a Base64 format the content of the images. + /// + /// + [JsonPropertyName("response_format")] + public object? ResponseFormat + { + get => this._responseFormat; + set + { + this.ThrowIfFrozen(); + this._responseFormat = value; + } + } + + /// + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// + [JsonPropertyName("user")] + public string? EndUserId + { + get => this._endUserId; + set + { + this.ThrowIfFrozen(); + this._endUserId = value; + } + } + + /// + public override void Freeze() + { + if (this.IsFrozen) + { + return; + } + + base.Freeze(); + } + + /// + public override PromptExecutionSettings Clone() + { + return new OpenAITextToImageExecutionSettings() + { + ModelId = this.ModelId, + ExtensionData = this.ExtensionData is not null ? new Dictionary(this.ExtensionData) : null, + Size = this.Size + }; + } + + /// + /// Create a new settings object with the values from another settings object. + /// + /// Template configuration + /// An instance of OpenAIPromptExecutionSettings + public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings) + { + if (executionSettings is null) + { + return new OpenAITextToImageExecutionSettings(); + } + + if (executionSettings is OpenAITextToImageExecutionSettings settings) + { + return settings; + } + + var json = JsonSerializer.Serialize(executionSettings); + var openAIExecutionSettings = JsonSerializer.Deserialize(json, JsonOptionsCache.ReadPermissive)!; + if (openAIExecutionSettings.ExtensionData?.TryGetValue("width", out var width) ?? false) + { + openAIExecutionSettings.Width = ((JsonElement)width).GetInt32(); + } + if (openAIExecutionSettings.ExtensionData?.TryGetValue("height", out var height) ?? false) + { + openAIExecutionSettings.Height = ((JsonElement)height).GetInt32(); + } + + return openAIExecutionSettings!; + } + + #region private ================================================================================ + + [JsonPropertyName("width")] + internal int? Width + { + get => this.Size?.Width; + set + { + if (!value.HasValue) { return; } + this.Size = (value.Value, this.Size?.Height ?? 0); + } + } + + [JsonPropertyName("height")] + internal int? Height + { + get => this.Size?.Height; + set + { + if (!value.HasValue) { return; } + this.Size = (this.Size?.Width ?? 0, value.Value); + } + } + + private (int Width, int Height)? _size; + private string? _quality; + private string? _style; + private object? _responseFormat; + private string? _endUserId; + + #endregion +} diff --git a/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs index 4b2b65dd5417..ac3f6d020c55 100644 --- a/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/AzureOpenAI/AzureOpenAITextToImageTests.cs @@ -4,10 +4,13 @@ using Azure.Identity; using Microsoft.Extensions.Configuration; using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.TextToImage; using SemanticKernel.IntegrationTests.TestSettings; using Xunit; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.IntegrationTests.Connectors.AzureOpenAI; public sealed class AzureOpenAITextToImageTests @@ -42,4 +45,30 @@ public async Task ItCanReturnImageUrlAsync() Assert.NotNull(result); Assert.StartsWith("https://", result); } + + [Fact] + public async Task GetImageContentsCanReturnImageUrlAsync() + { + // Arrange + AzureOpenAIConfiguration? configuration = this._configuration.GetSection("AzureOpenAITextToImage").Get(); + Assert.NotNull(configuration); + + var kernel = Kernel.CreateBuilder() + .AddAzureOpenAITextToImage( + deploymentName: configuration.DeploymentName, + endpoint: configuration.Endpoint, + credentials: new AzureCliCredential()) + .Build(); + + var service = kernel.GetRequiredService(); + + // Act + var result = await service.GetImageContentsAsync("The sun rises in the east and sets in the west.", new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + + // Assert + Assert.NotNull(result); + Assert.NotEmpty(result); + Assert.NotEmpty(result[0].Uri!.ToString()); + Assert.StartsWith("https://", result[0].Uri!.ToString()); + } } diff --git a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs index 85512760dcd0..07524b592973 100644 --- a/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/OpenAI/OpenAITextToImageTests.cs @@ -3,10 +3,13 @@ using System.Threading.Tasks; using Microsoft.Extensions.Configuration; using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.TextToImage; using SemanticKernel.IntegrationTests.TestSettings; using Xunit; +#pragma warning disable CS0618 // Type or member is obsolete + namespace SemanticKernel.IntegrationTests.Connectors.OpenAI; public sealed class OpenAITextToImageTests { @@ -48,7 +51,7 @@ public async Task OpenAITextToImageUseDallE2ByDefaultAsync() Assert.NotNull(openAIConfiguration); var kernel = Kernel.CreateBuilder() - .AddOpenAITextToImage(apiKey: openAIConfiguration.ApiKey, modelId: null) + .AddOpenAITextToImage(apiKey: openAIConfiguration.ApiKey) .Build(); var service = kernel.GetRequiredService(); @@ -60,4 +63,26 @@ public async Task OpenAITextToImageUseDallE2ByDefaultAsync() Assert.NotNull(result); Assert.NotEmpty(result); } + + [Fact] + public async Task OpenAITextToImageDalle3GetImagesTestAsync() + { + // Arrange + OpenAIConfiguration? openAIConfiguration = this._configuration.GetSection("OpenAITextToImage").Get(); + Assert.NotNull(openAIConfiguration); + + var kernel = Kernel.CreateBuilder() + .AddOpenAITextToImage(apiKey: openAIConfiguration.ApiKey, modelId: "dall-e-3") + .Build(); + + var service = kernel.GetRequiredService(); + + // Act + var result = await service.GetImageContentsAsync("The sun rises in the east and sets in the west.", new OpenAITextToImageExecutionSettings { Size = (1024, 1024) }); + + // Assert + Assert.NotNull(result); + Assert.NotEmpty(result); + Assert.NotEmpty(result[0].Uri!.ToString()); + } } diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs index 7370a6eb38ef..3eb2d890aa54 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/ITextToImageService.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Threading; using System.Threading.Tasks; @@ -14,19 +15,17 @@ namespace Microsoft.SemanticKernel.TextToImage; public interface ITextToImageService : IAIService { /// - /// Generate an image matching the given description + /// Given a prompt and/or an input text, the model will generate a new image. /// - /// Image generation prompt - /// Image width in pixels - /// Image height in pixels + /// Input text for image generation + /// Text to image execution settings /// The containing services, plugins, and other state for use throughout the operation. /// The to monitor for cancellation requests. The default is . - /// Generated image in base64 format or image URL + /// Generated image contents [Experimental("SKEXP0001")] - public Task GenerateImageAsync( - string description, - int width, - int height, + public Task> GetImageContentsAsync( + TextContent input, + PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default); } diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs new file mode 100644 index 000000000000..26945f32c4a4 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/AI/TextToImage/TextToImageServiceExtensions.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.SemanticKernel.TextToImage; + +/// +/// Extension methods for . +/// +public static class TextToImageServiceExtensions +{ + /// + /// Given a prompt and/or an input text, the model will generate a new image. + /// + /// Target instance + /// Image generation prompt + /// Image width in pixels + /// Image height in pixels + /// The containing services, plugins, and other state for use throughout the operation. + /// The to monitor for cancellation requests. The default is . + /// Generated image in base64 format or image URL + public static async Task GenerateImageAsync(this ITextToImageService service, + string description, + int width, + int height, + Kernel? kernel = null, + CancellationToken cancellationToken = default) + { + var imageJson = $$""" + { + "width": {{width}}, + "height": {{height}} + } + """; + + var executionSettings = JsonSerializer.Deserialize(imageJson); + + var result = await service.GetImageContentsAsync(new TextContent(description), executionSettings, kernel, cancellationToken).ConfigureAwait(false); + + return result[0].Uri!.ToString(); + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml new file mode 100644 index 000000000000..f0c61ea95587 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/CompatibilitySuppressions.xml @@ -0,0 +1,32 @@ + + + + + CP0002 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0002 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GenerateImageAsync(System.String,System.Int32,System.Int32,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0006 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + lib/net8.0/Microsoft.SemanticKernel.Abstractions.dll + true + + + CP0006 + M:Microsoft.SemanticKernel.TextToImage.ITextToImageService.GetImageContentsAsync(Microsoft.SemanticKernel.TextContent,Microsoft.SemanticKernel.PromptExecutionSettings,Microsoft.SemanticKernel.Kernel,System.Threading.CancellationToken) + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + lib/netstandard2.0/Microsoft.SemanticKernel.Abstractions.dll + true + + \ No newline at end of file diff --git a/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs b/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs index b8c3867ff358..558ab739d279 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Contents/TextContent.cs @@ -57,4 +57,13 @@ public override string ToString() { return this.Text ?? string.Empty; } + + /// + /// When converting a string to a , the content is automatically set to the string value. + /// + /// Text content + public static implicit operator TextContent(string text) + { + return new TextContent(text); + } }