Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

.Net: Improve the ITextToImageService abstractions to support ExecutionSettings #8068

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1f567f3
Improving the ITextToImageService to support ExecutionSettings, Image…
RogerBarreto Aug 9, 2024
0af318e
Merge branch 'microsoft:main' into issues/5657-itexttoimage-update
RogerBarreto Aug 12, 2024
201423b
Add quality and style to TextToImageRequest
aghimir3 Aug 14, 2024
6895c05
Implement GetImageContentsAsync in OpenAITextToImageService
aghimir3 Aug 14, 2024
ef12678
Add unit tests for GetImageContentsAsync method
aghimir3 Aug 14, 2024
9a47d7c
Merge pull request #4 from aghimir3/issues/5657-itexttoimage-ag
RogerBarreto Aug 14, 2024
f58e689
Merge branch 'main' of https://github.com/microsoft/semantic-kernel i…
RogerBarreto Sep 13, 2024
36d4fb9
Adding support for Text-To-Image Settings
RogerBarreto Sep 13, 2024
b9e40b4
Removed unecessary parameter
RogerBarreto Sep 13, 2024
23e2f03
Fix Warnings
RogerBarreto Sep 16, 2024
3e0c967
Add missing xmldocs and small changes
RogerBarreto Sep 16, 2024
e7f87eb
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 20, 2024
17c1492
GenerateImage extension method UT working
RogerBarreto Sep 23, 2024
2141016
Updating ITs
RogerBarreto Sep 23, 2024
dc7f8ef
Address extra folder
RogerBarreto Sep 23, 2024
373870e
Fix warnings
RogerBarreto Sep 24, 2024
de0c4f2
Fix warnings
RogerBarreto Sep 24, 2024
a783563
Update Obsolete Concepts
RogerBarreto Sep 24, 2024
34da6a7
Fix warning
RogerBarreto Sep 24, 2024
7f5ec3b
Merge branch 'main' of https://github.com/microsoft/semantic-kernel i…
RogerBarreto Sep 27, 2024
f0c95cd
Adding suppressions were needed
RogerBarreto Sep 27, 2024
1681cd4
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 27, 2024
e2177c4
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 30, 2024
31b6d8c
Address PR comments
RogerBarreto Sep 30, 2024
3ea6fab
Address UT and IT
RogerBarreto Sep 30, 2024
af81e60
Adding extension method usage concepts
RogerBarreto Sep 30, 2024
a459d54
Adding missing MS header
RogerBarreto Sep 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Microsoft.Extensions.Http.Resilience;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.TextToImage;

namespace TextToImage;
Expand Down Expand Up @@ -78,6 +79,24 @@ A cute baby sea otter
*/
}

[Fact]
public async Task SimpleTextToImageExampleAsync()
{
var builder = Kernel.CreateBuilder()
.AddAzureOpenAITextToImage( // Add your text to image service
deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName,
endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint,
apiKey: TestConfiguration.AzureOpenAI.ImageApiKey,
modelId: TestConfiguration.AzureOpenAI.ImageModelId);

var kernel = builder.Build();
var service = kernel.GetRequiredService<ITextToImageService>();

var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Size = (Width: 1792, Height: 1024) });

this.Output.WriteLine(generatedImages[0].Uri!.ToString());
}

[Fact(Skip = "Generating the Image can take too long and often break the test")]
public async Task AzureOpenAIDallEAsync()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Threading.Tasks;
using Azure.AI.OpenAI;
using Azure.Core;
using Microsoft.Extensions.Logging;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Services;
using Moq;
using OpenAI.Images;

namespace SemanticKernel.Connectors.AzureOpenAI.UnitTests.Services;

Expand All @@ -30,7 +33,7 @@ public AzureOpenAITextToImageServiceTests()
{
ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK)
{
Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.txt"))
Content = new StringContent(File.ReadAllText("./TestData/text-to-image-response.json"))
}
};
this._httpClient = new HttpClient(this._messageHandlerStub, false);
Expand Down Expand Up @@ -143,6 +146,191 @@ public void ItShouldThrowExceptionIfNoEndpointProvided(bool useTokeCredential, s
}
}

[Theory]
[InlineData(null, null)]
[InlineData("uri", "url")]
[InlineData("url", "url")]
[InlineData("GeneratedImage.Uri", "url")]
[InlineData("bytes", "b64_json")]
[InlineData("b64_json", "b64_json")]
[InlineData("GeneratedImage.Bytes", "b64_json")]
public async Task GetUriImageContentsResponseFormatRequestWorksCorrectlyAsync(string? responseFormatOption, string? expectedResponseFormat)
{
// Arrange
object? responseFormatObject = responseFormatOption switch
{
"GeneratedImage.Uri" => GeneratedImageFormat.Uri,
"GeneratedImage.Bytes" => GeneratedImageFormat.Bytes,
_ => responseFormatOption
};

this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = responseFormatObject });

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedResponseFormat is not null)
{
Assert.Contains($"\"response_format\":\"{expectedResponseFormat}\"", requestBody);
}
else
{
// Then no response format is provided, it should not be included in the request body
Assert.DoesNotContain("response_format", requestBody);
}
}

[Theory]
[InlineData(null, null)]
[InlineData("hd", "hd")]
[InlineData("high", "hd")]
[InlineData("standard", "standard")]
public async Task GetUriImageContentsImageQualityRequestWorksCorrectlyAsync(string? quality, string? expectedQuality)
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Quality = quality });

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedQuality is not null)
{
Assert.Contains($"\"quality\":\"{expectedQuality}\"", requestBody);
}
else
{
// Then no quality is provided, it should not be included in the request body
Assert.DoesNotContain("quality", requestBody);
}
}

[Theory]
[InlineData(null, null)]
[InlineData("vivid", "vivid")]
[InlineData("natural", "natural")]
public async Task GetUriImageContentsImageStyleRequestWorksCorrectlyAsync(string? style, string? expectedStyle)
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { Style = style });

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedStyle is not null)
{
Assert.Contains($"\"style\":\"{expectedStyle}\"", requestBody);
}
else
{
// Then no style is provided, it should not be included in the request body
Assert.DoesNotContain("style", requestBody);
}
}

[Theory]
[InlineData(null, null, null)]
[InlineData(1, 2, "1x2")]
public async Task GetUriImageContentsImageSizeRequestWorksCorrectlyAsync(int? width, int? height, string? expectedSize)
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings
{
Size = width.HasValue && height.HasValue
? (width.Value, height.Value)
: null
});

// Assert
Assert.NotNull(result);
Assert.NotNull(this._messageHandlerStub.RequestContent);

var requestBody = UTF8Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent);
if (expectedSize is not null)
{
Assert.Contains($"\"size\":\"{expectedSize}\"", requestBody);
}
else
{
// Then no size is provided, it should not be included in the request body
Assert.DoesNotContain("size", requestBody);
}
}

[Fact]
public async Task GetByteImageContentsResponseWorksCorrectlyAsync()
{
// Arrange
this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK)
{
Content = new StringContent(File.ReadAllText("./TestData/text-to-image-b64_json-format-response.json"))
};

this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "b64_json" });

// Assert
Assert.NotNull(result);
Assert.Single(result);
var imageContent = result[0];
Assert.NotNull(imageContent);
Assert.True(imageContent.CanRead);
Assert.Equal("image/png", imageContent.MimeType);
Assert.NotNull(imageContent.InnerContent);
Assert.IsType<GeneratedImage>(imageContent.InnerContent);

var breakingGlass = imageContent.InnerContent as GeneratedImage;
Assert.Equal("my prompt", breakingGlass!.RevisedPrompt);
}

[Fact]
public async Task GetUrlImageContentsResponseWorksCorrectlyAsync()
{
// Arrange
this._httpClient.BaseAddress = new Uri("https://api-host");
var sut = new AzureOpenAITextToImageService("deployment", endpoint: null!, credential: new Mock<TokenCredential>().Object, "dall-e-3", this._httpClient);

// Act
var result = await sut.GetImageContentsAsync("my prompt", new OpenAITextToImageExecutionSettings { ResponseFormat = "url" });

// Assert
Assert.NotNull(result);
Assert.Single(result);
var imageContent = result[0];
Assert.NotNull(imageContent);
Assert.False(imageContent.CanRead);
Assert.Equal(new Uri("https://image-url/"), imageContent.Uri);
Assert.NotNull(imageContent.InnerContent);
Assert.IsType<GeneratedImage>(imageContent.InnerContent);

var breakingGlass = imageContent.InnerContent as GeneratedImage;
Assert.Equal("my prompt", breakingGlass!.RevisedPrompt);
}

public void Dispose()
{
this._httpClient.Dispose();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"created": 1726234481,
"data": [
{
"b64_json": "iVBORw0KGgoAAA==",
"revised_prompt": "my prompt"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"created": 1702575371,
"data": [
{
"revised_prompt": "my prompt",
"url": "https://image-url/"
}
]
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,8 @@ public AzureOpenAITextToImageService(
{
Verify.NotNullOrWhiteSpace(apiKey);

var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri;
if (connectorEndpoint is null)
{
throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided.");
}
var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri)
RogerBarreto marked this conversation as resolved.
Show resolved Hide resolved
?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided.");

var options = AzureClientCore.GetAzureOpenAIClientOptions(
httpClient,
Expand Down Expand Up @@ -87,11 +84,8 @@ public AzureOpenAITextToImageService(
{
Verify.NotNull(credential);

var connectorEndpoint = !string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri;
if (connectorEndpoint is null)
{
throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided.");
}
var connectorEndpoint = (!string.IsNullOrWhiteSpace(endpoint) ? endpoint! : httpClient?.BaseAddress?.AbsoluteUri)
?? throw new ArgumentException($"The {nameof(httpClient)}.{nameof(HttpClient.BaseAddress)} and {nameof(endpoint)} are both null or empty. Please ensure at least one is provided.");

var options = AzureClientCore.GetAzureOpenAIClientOptions(
httpClient,
Expand Down Expand Up @@ -133,4 +127,8 @@ public AzureOpenAITextToImageService(
/// <inheritdoc/>
public Task<string> GenerateImageAsync(string description, int width, int height, Kernel? kernel = null, CancellationToken cancellationToken = default)
=> this._client.GenerateImageAsync(this._client.DeploymentName, description, width, height, cancellationToken);

/// <inheritdoc/>
public Task<IReadOnlyList<ImageContent>> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
=> this._client.GetImageContentsAsync(input, executionSettings, kernel, cancellationToken);
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@
<None Update="TestData\text-embeddings-response.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\text-to-image-response.txt">
<None Update="TestData\text-to-image-b64_json-format-response.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\text-to-image-response.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
Expand Down
Loading
Loading