Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

.Net: Improve the ITextToImageService abstractions to support ExecutionSettings #8068

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1f567f3
Improving the ITextToImageService to support ExecutionSettings, Image…
RogerBarreto Aug 9, 2024
0af318e
Merge branch 'microsoft:main' into issues/5657-itexttoimage-update
RogerBarreto Aug 12, 2024
201423b
Add quality and style to TextToImageRequest
aghimir3 Aug 14, 2024
6895c05
Implement GetImageContentsAsync in OpenAITextToImageService
aghimir3 Aug 14, 2024
ef12678
Add unit tests for GetImageContentsAsync method
aghimir3 Aug 14, 2024
9a47d7c
Merge pull request #4 from aghimir3/issues/5657-itexttoimage-ag
RogerBarreto Aug 14, 2024
f58e689
Merge branch 'main' of https://github.com/microsoft/semantic-kernel i…
RogerBarreto Sep 13, 2024
36d4fb9
Adding support for Text-To-Image Settings
RogerBarreto Sep 13, 2024
b9e40b4
Removed unecessary parameter
RogerBarreto Sep 13, 2024
23e2f03
Fix Warnings
RogerBarreto Sep 16, 2024
3e0c967
Add missing xmldocs and small changes
RogerBarreto Sep 16, 2024
e7f87eb
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 20, 2024
17c1492
GenerateImage extension method UT working
RogerBarreto Sep 23, 2024
2141016
Updating ITs
RogerBarreto Sep 23, 2024
dc7f8ef
Address extra folder
RogerBarreto Sep 23, 2024
373870e
Fix warnings
RogerBarreto Sep 24, 2024
de0c4f2
Fix warnings
RogerBarreto Sep 24, 2024
a783563
Update Obsolete Concepts
RogerBarreto Sep 24, 2024
34da6a7
Fix warning
RogerBarreto Sep 24, 2024
7f5ec3b
Merge branch 'main' of https://github.com/microsoft/semantic-kernel i…
RogerBarreto Sep 27, 2024
f0c95cd
Adding suppressions were needed
RogerBarreto Sep 27, 2024
1681cd4
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 27, 2024
e2177c4
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 30, 2024
31b6d8c
Address PR comments
RogerBarreto Sep 30, 2024
3ea6fab
Address UT and IT
RogerBarreto Sep 30, 2024
af81e60
Adding extension method usage concepts
RogerBarreto Sep 30, 2024
a459d54
Adding missing MS header
RogerBarreto Sep 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 43 additions & 24 deletions dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Microsoft.Extensions.Http.Resilience;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.TextToImage;

namespace TextToImage;
Expand All @@ -12,7 +13,7 @@ namespace TextToImage;
public class OpenAI_TextToImageDalle3(ITestOutputHelper output) : BaseTest(output)
{
[Fact]
public async Task OpenAIDallEAsync()
public async Task OpenAIDallE2Async()
{
Console.WriteLine("======== OpenAI DALL-E 2 Text To Image ========");

Expand All @@ -24,8 +25,8 @@ public async Task OpenAIDallEAsync()
ITextToImageService dallE = kernel.GetRequiredService<ITextToImageService>();

var imageDescription = "A cute baby sea otter";
var image = await dallE.GenerateImageAsync(imageDescription, 256, 256);

var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (256, 256) });
var image = images[0].Uri!.ToString();
Console.WriteLine(imageDescription);
Console.WriteLine("Image URL: " + image);

Expand All @@ -51,7 +52,8 @@ A cute baby sea otter

var reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
chatHistory.Add(reply);
image = await dallE.GenerateImageAsync(reply.Content!, 256, 256);
images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (256, 256) });
image = images[0].Uri!.ToString();
Console.WriteLine("Bot: " + image);
Console.WriteLine("Img description: " + reply);

Expand All @@ -61,7 +63,8 @@ A cute baby sea otter

reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
chatHistory.Add(reply);
image = await dallE.GenerateImageAsync(reply.Content!, 256, 256);
images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (256, 256) });
image = images[0].Uri!.ToString();
Console.WriteLine("Bot: " + image);
Console.WriteLine("Img description: " + reply);

Expand All @@ -78,46 +81,60 @@ A cute baby sea otter
*/
}

[Fact(Skip = "Generating the Image can take too long and often break the test")]
public async Task AzureOpenAIDallEAsync()
[Fact]
public async Task SimpleTextToImageExampleAsync()
{
var builder = Kernel.CreateBuilder()
.AddAzureOpenAITextToImage( // Add your text to image service
deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName,
endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint,
apiKey: TestConfiguration.AzureOpenAI.ImageApiKey,
modelId: TestConfiguration.AzureOpenAI.ImageModelId);

var kernel = builder.Build();
var service = kernel.GetRequiredService<ITextToImageService>();

var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Size = (Width: 1792, Height: 1024) });

this.Output.WriteLine(generatedImages[0].Uri!.ToString());
}

[Fact]
public async Task OpenAIDallE3Async()
{
Console.WriteLine("========Azure OpenAI DALL-E 3 Text To Image ========");
Console.WriteLine("======== OpenAI DALL-E 3 Text To Image ========");

var builder = Kernel.CreateBuilder()
.AddAzureOpenAITextToImage( // Add your text to image service
deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName,
endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint,
apiKey: TestConfiguration.AzureOpenAI.ImageApiKey,
modelId: TestConfiguration.AzureOpenAI.ImageModelId,
apiVersion: "2024-02-15-preview") //DALL-E 3 is only supported in this version
.AddAzureOpenAIChatCompletion( // Add your chat completion service
deploymentName: TestConfiguration.AzureOpenAI.ChatDeploymentName,
endpoint: TestConfiguration.AzureOpenAI.Endpoint,
apiKey: TestConfiguration.AzureOpenAI.ApiKey);
.AddOpenAITextToImage( // Add your text to image service
modelId: "dall-e-3",
apiKey: TestConfiguration.OpenAI.ApiKey) //DALL-E 3 is only supported in this version
.AddOpenAIChatCompletion( // Add your chat completion service
modelId: TestConfiguration.OpenAI.ChatModelId,
apiKey: TestConfiguration.OpenAI.ApiKey);

builder.Services.ConfigureHttpClientDefaults(c =>
{
// Use a standard resiliency policy, augmented to retry 5 times
c.AddStandardResilienceHandler().Configure(o =>
{
o.Retry.MaxRetryAttempts = 5;
o.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(60);
o.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(120);
});
});

var kernel = builder.Build();

ITextToImageService dallE = kernel.GetRequiredService<ITextToImageService>();
var imageDescription = "A cute baby sea otter";
var image = await dallE.GenerateImageAsync(imageDescription, 1024, 1024);
var images = await dallE.GetImageContentsAsync(imageDescription, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) });

Console.WriteLine(imageDescription);
Console.WriteLine("Image URL: " + image);
Console.WriteLine("Image URL: " + images[0].Uri!);

/* Output:

A cute baby sea otter
Image URL: https://dalleproduse.blob.core.windows.net/private/images/....
Image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/org-/....

*/

Expand All @@ -136,7 +153,8 @@ A cute baby sea otter

var reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
chatHistory.Add(reply);
image = await dallE.GenerateImageAsync(reply.Content!, 1024, 1024);
images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) });
var image = images[0].Uri!.ToString();
Console.WriteLine("Bot: " + image);
Console.WriteLine("Img description: " + reply);

Expand All @@ -146,7 +164,8 @@ A cute baby sea otter

reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
chatHistory.Add(reply);
image = await dallE.GenerateImageAsync(reply.Content!, 1024, 1024);
images = await dallE.GetImageContentsAsync(reply.Content!, new OpenAITextToImageExecutionSettings { Size = (1024, 1024) });
image = images[0].Uri!.ToString();
Console.WriteLine("Bot: " + image);
Console.WriteLine("Img description: " + reply);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Threading.Tasks;
using Azure.AI.OpenAI;
using Azure.Core;
using Microsoft.Extensions.Logging;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Services;
using Microsoft.SemanticKernel.TextToImage;
using Moq;
using OpenAI.Images;

#pragma warning disable CS0618 // Type or member is obsolete

namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Services;

Expand All @@ -30,7 +36,7 @@ public AzureOpenAITextToImageServiceTests()
{
ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK)
{
Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.txt"))
Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.json"))
}
};
this._httpClient = new HttpClient(this._messageHandlerStub, false);
Expand Down Expand Up @@ -80,7 +86,7 @@ public async Task GenerateImageWorksCorrectlyAsync(int width, int height, string
Assert.NotNull(request);
Assert.Equal("description", request["prompt"]?.ToString());
Assert.Equal("deployment", request["model"]?.ToString());
Assert.Equal("url", request["response_format"]?.ToString());
Assert.Null(request["response_format"]);
Assert.Equal($"{width}x{height}", request["size"]?.ToString());
}

Expand Down Expand Up @@ -143,6 +149,191 @@ public void ItShouldThrowExceptionIfNoEndpointProvided(bool useTokeCredential, s
}
}

[Theory]
[InlineData(null, null)]
[InlineData("uri", "url")]
[InlineData("url", "url")]
[InlineData("GeneratedImage.Uri", "url")]
[InlineData("bytes", "b64_json")]
[InlineData("b64_json", "b64_json")]
[InlineData("GeneratedImage.Bytes", "b64_json")]
public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? responseFormatOption, string? expectedResponseFormat)
{
// Arrange
object? responseFormatObject = responseFormatOption switch
{
"GeneratedImage.Uri" => GeneratedImageFormat.Uri,
"GeneratedImage.Bytes" => GeneratedImageFormat.Bytes,
_ => responseFormatOption
};

this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = responseFormatObject });

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedResponseFormat is not null)
{
Assert.Contains($"\"response_format\":\"{expectedResponseFormat}\"", requestBody);
}
else
{
// Then no response format is provided, it should not be included in the request body
Assert.DoesNotContain("response_format", requestBody);
}
}

[Theory]
[InlineData(null, null)]
[InlineData("hd", "hd")]
[InlineData("high", "hd")]
[InlineData("standard", "standard")]
public async Task GetUriImageContentsImageQualityRequestWorksCorrectlyAsync(string? quality, string? expectedQuality)
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Quality = quality });

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedQuality is not null)
{
Assert.Contains($"\"quality\":\"{expectedQuality}\"", requestBody);
}
else
{
// Then no quality is provided, it should not be included in the request body
Assert.DoesNotContain("quality", requestBody);
}
}

[Theory]
[InlineData(null, null)]
[InlineData("vivid", "vivid")]
[InlineData("natural", "natural")]
public async Task GetUriImageContentsImageStyleRequestWorksCorrectlyAsync(string? style, string? expectedStyle)
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Style = style });

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedStyle is not null)
{
Assert.Contains($"\"style\":\"{expectedStyle}\"", requestBody);
}
else
{
// Then no style is provided, it should not be included in the request body
Assert.DoesNotContain("style", requestBody);
}
}

[Theory]
[InlineData(null, null, null)]
[InlineData(1, 2, "1x2")]
public async Task GetUriImageContentsImageSizeRequestWorksCorrectlyAsync(int? width, int? height, string? expectedSize)
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings
{
Size = width.HasValue && height.HasValue
? (width.Value, height.Value)
: null
});

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedSize is not null)
{
Assert.Contains($"\"size\":\"{expectedSize}\"", requestBody);
}
else
{
// Then no size is provided, it should not be included in the request body
Assert.DoesNotContain("size", requestBody);
}
}

[Fact]
public async Task GetByteImageContentsResponseWorksCorrectlyAsync()
{
// Arrange
this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK)
{
Content = new StringContent(File.ReadAllText("./TestData/text-to-image-b64_json-format-response.json"))
};

this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "b64_json" });

// Assert
Assert.NotNull(result);
Assert.Single(result);
var imageContent = result[0];
Assert.NotNull(imageContent);
Assert.True(imageContent.CanRead);
Assert.Equal("image/png", imageContent.MimeType);
Assert.NotNull(imageContent.InnerContent);
Assert.IsType<GeneratedImage>(imageContent.InnerContent);

var breakingGlass = imageContent.InnerContent as GeneratedImage;
Assert.Equal("my prompt", breakingGlass!.RevisedPrompt);
}

[Fact]
public async Task GetUrlImageContentsResponseWorksCorrectlyAsync()
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "url" });

// Assert
Assert.NotNull(result);
Assert.Single(result);
var imageContent = result[0];
Assert.NotNull(imageContent);
Assert.False(imageContent.CanRead);
Assert.Equal(new Uri("https://image-url/"), imageContent.Uri);
Assert.NotNull(imageContent.InnerContent);
Assert.IsType<GeneratedImage>(imageContent.InnerContent);

var breakingGlass = imageContent.InnerContent as GeneratedImage;
Assert.Equal("my prompt", breakingGlass!.RevisedPrompt);
}

public void Dispose()
{
this._httpClient.Dispose();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Connectors.OpenAI;

#pragma warning disable CS0618 // Type or member is obsolete

namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Settings;

/// <summary>
Expand Down Expand Up @@ -242,9 +244,8 @@ public void FromExecutionSettingsWithDataDoesNotIncludeEmptyStopSequences()
var executionSettings = new AzureOpenAIPromptExecutionSettings { StopSequences = [] };

// Act
#pragma warning disable CS0618 // AzureOpenAIChatCompletionWithData is deprecated in favor of OpenAIPromptExecutionSettings.AzureChatExtensionsOptions
var executionSettingsWithData = AzureOpenAIPromptExecutionSettings.FromExecutionSettingsWithData(executionSettings);
#pragma warning restore CS0618

// Assert
Assert.Null(executionSettingsWithData.StopSequences);
}
Expand Down
Loading
Loading