Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

.Net: Improve the ITextToImageService abstractions to support ExecutionSettings #8068

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1f567f3
Improving the ITextToImageService to support ExecutionSettings, Image…
RogerBarreto Aug 9, 2024
0af318e
Merge branch 'microsoft:main' into issues/5657-itexttoimage-update
RogerBarreto Aug 12, 2024
201423b
Add quality and style to TextToImageRequest
aghimir3 Aug 14, 2024
6895c05
Implement GetImageContentsAsync in OpenAITextToImageService
aghimir3 Aug 14, 2024
ef12678
Add unit tests for GetImageContentsAsync method
aghimir3 Aug 14, 2024
9a47d7c
Merge pull request #4 from aghimir3/issues/5657-itexttoimage-ag
RogerBarreto Aug 14, 2024
f58e689
Merge branch 'main' of https://github.com/microsoft/semantic-kernel i…
RogerBarreto Sep 13, 2024
36d4fb9
Adding support for Text-To-Image Settings
RogerBarreto Sep 13, 2024
b9e40b4
Removed unecessary parameter
RogerBarreto Sep 13, 2024
23e2f03
Fix Warnings
RogerBarreto Sep 16, 2024
3e0c967
Add missing xmldocs and small changes
RogerBarreto Sep 16, 2024
e7f87eb
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 20, 2024
17c1492
GenerateImage extension method UT working
RogerBarreto Sep 23, 2024
2141016
Updating ITs
RogerBarreto Sep 23, 2024
dc7f8ef
Address extra folder
RogerBarreto Sep 23, 2024
373870e
Fix warnings
RogerBarreto Sep 24, 2024
de0c4f2
Fix warnings
RogerBarreto Sep 24, 2024
a783563
Update Obsolete Concepts
RogerBarreto Sep 24, 2024
34da6a7
Fix warning
RogerBarreto Sep 24, 2024
7f5ec3b
Merge branch 'main' of https://github.com/microsoft/semantic-kernel i…
RogerBarreto Sep 27, 2024
f0c95cd
Adding suppressions were needed
RogerBarreto Sep 27, 2024
1681cd4
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 27, 2024
e2177c4
Merge branch 'main' into issues/5657-itexttoimage-update
RogerBarreto Sep 30, 2024
31b6d8c
Address PR comments
RogerBarreto Sep 30, 2024
3ea6fab
Address UT and IT
RogerBarreto Sep 30, 2024
af81e60
Adding extension method usage concepts
RogerBarreto Sep 30, 2024
a459d54
Adding missing MS header
RogerBarreto Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions dotnet/samples/Concepts/TextToImage/OpenAI_TextToImageDalle3.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Microsoft.Extensions.Http.Resilience;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.TextToImage;

namespace TextToImage;
Expand Down Expand Up @@ -78,6 +79,24 @@ A cute baby sea otter
*/
}

[Fact]
public async Task SimpleTextToImageExampleAsync()
{
var builder = Kernel.CreateBuilder()
.AddAzureOpenAITextToImage( // Add your text to image service
deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName,
endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint,
apiKey: TestConfiguration.AzureOpenAI.ImageApiKey,
modelId: TestConfiguration.AzureOpenAI.ImageModelId);

var kernel = builder.Build();
var service = kernel.GetRequiredService<ITextToImageService>();

var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Width = 1024, Height = 1024 });

this.Output.WriteLine(generatedImages[0].Uri!.ToString());
}

[Fact(Skip = "Generating the Image can take too long and often break the test")]
public async Task AzureOpenAIDallEAsync()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,4 +209,77 @@ internal void AddAttribute(string key, string? value)
this._attributes.Add(key, value);
}
}

/// <inheritdoc/>
public async Task<IReadOnlyList<ImageContent>> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings);

Verify.NotNull(input);

var size = (imageSettings.Width, imageSettings.Height) switch
{
(256, 256) => ImageSize.Size256x256,
(512, 512) => ImageSize.Size512x512,
(1024, 1024) => ImageSize.Size1024x1024,
(1792, 1024) => ImageSize.Size1792x1024,
(1024, 1792) => ImageSize.Size1024x1792,
_ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}")
};

Response<ImageGenerations> imageGenerations;
try
{
var options = new ImageGenerationOptions
{
DeploymentName = this._deploymentName,
ImageCount = imageSettings.ImageCount,
Prompt = input.Text,
Size = size,
};

if (imageSettings.Quality is not null)
{
options.Quality = imageSettings.Quality;
}
if (imageSettings.Style is not null)
{
options.Style = imageSettings.Style;
}

imageGenerations = await this._client.GetImageGenerationsAsync(options, cancellationToken).ConfigureAwait(false);
}
catch (RequestFailedException e)
{
throw e.ToHttpOperationException();
}

if (!imageGenerations.HasValue)
{
throw new KernelException("The response does not contain an image result");
}

if (imageGenerations.Value.Data.Count == 0)
{
throw new KernelException("The response does not contain any image");
}

List<ImageContent> images = [];
foreach (var image in imageGenerations.Value.Data)
{
if (image.Url is not null)
{
images.Add(new ImageContent(image.Url));
}
else if (image.Base64Data is not null)
{
images.Add(new ImageContent($"data:;base64,{image.Base64Data}"));
}
else
{
throw new NotSupportedException("Image is neither an URL nor a base64 data");
}
}
return images;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.SemanticKernel.Text;

namespace Microsoft.SemanticKernel.Connectors.OpenAI;

/// <summary>
/// Text to image execution settings for an OpenAI image generation request.
/// </summary>
[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)]
public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings
{
private const int DefaultWidth = 1024;
private const int DefaultHeight = 1024;

/// <summary>
/// Initializes a new instance of the <see cref="OpenAITextToImageExecutionSettings"/> class.
/// </summary>
public OpenAITextToImageExecutionSettings()
{
this.Width = DefaultWidth;
this.Height = DefaultHeight;
}
/// <summary>
/// Width of the generated image.
/// </summary>
public int Width
{
get => this._width;

set
{
this.ThrowIfFrozen();
this._width = value;
}
}

/// <summary>
/// The quality of the image that will be generated.
/// `hd` creates images with finer details and greater consistency across the image.
/// This param is only supported for dall-e-3.
/// </summary>
public string? Quality
{
get => this._quality;

set
{
this.ThrowIfFrozen();
this._quality = value;
}
}

/// <summary>
/// The number of images to generate. Must be between 1 and 10.
/// For dall-e-3, only ImageCount = 1 is supported.
/// </summary>
public int? ImageCount
{
get => this._imageCount;

set
{
this.ThrowIfFrozen();
this._imageCount = value;
}
}

/// <summary>
/// The style of the generated images. Must be one of vivid or natural.
/// Vivid causes the model to lean towards generating hyper-real and dramatic images.
/// Natural causes the model to produce more natural, less hyper-real looking images.
/// This param is only supported for dall-e-3.
/// </summary>
public string? Style
{
get => this._style;

set
{
this.ThrowIfFrozen();
this._style = value;
}
}

/// <summary>
/// Height of the generated image.
/// </summary>
public int Height
{
get => this._height;

set
{
this.ThrowIfFrozen();
this._height = value;
}
}

/// <inheritdoc/>
public override void Freeze()
{
if (this.IsFrozen)
{
return;
}

base.Freeze();
}

/// <inheritdoc/>
public override PromptExecutionSettings Clone()
{
return new OpenAITextToImageExecutionSettings()
{
ModelId = this.ModelId,
ExtensionData = this.ExtensionData is not null ? new Dictionary<string, object>(this.ExtensionData) : null,
Width = this.Width,
Height = this.Height,
};
}

/// <summary>
/// Create a new settings object with the values from another settings object.
/// </summary>
/// <param name="executionSettings">Template configuration</param>
/// <param name="defaultMaxTokens">Default max tokens</param>
/// <returns>An instance of OpenAIPromptExecutionSettings</returns>
public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings, int? defaultMaxTokens = null)
RogerBarreto marked this conversation as resolved.
Show resolved Hide resolved
{
if (executionSettings is null)
{
return new OpenAITextToImageExecutionSettings();
}

if (executionSettings is OpenAITextToImageExecutionSettings settings)
{
return settings;
}

var json = JsonSerializer.Serialize(executionSettings);

var openAIExecutionSettings = JsonSerializer.Deserialize<OpenAITextToImageExecutionSettings>(json, JsonOptionsCache.ReadPermissive);
return openAIExecutionSettings!;
}

#region private ================================================================================

private int _width;
private int _height;
private int? _imageCount;
private string? _quality;
private string? _style;

#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Net.Http;
using System.Text.Json;
using System.Threading;
Expand Down Expand Up @@ -93,6 +94,75 @@ public Task<string> GenerateImageAsync(string description, int width, int height
return this.GenerateImageAsync(this._modelId, description, width, height, "url", x => x.Url, cancellationToken);
}

/// <inheritdoc/>
public async Task<IReadOnlyList<ImageContent>> GetImageContentsAsync(
TextContent input,
PromptExecutionSettings? executionSettings = null,
Kernel? kernel = null,
CancellationToken cancellationToken = default)
{
// Ensure the input is valid
Verify.NotNull(input);

// Convert the generic execution settings to OpenAI-specific settings
var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings);

// Determine the size of the image based on the width and height settings
var size = (imageSettings.Width, imageSettings.Height) switch
{
(256, 256) => "256x256",
(512, 512) => "512x512",
(1024, 1024) => "1024x1024",
(1792, 1024) => "1792x1024",
(1024, 1792) => "1024x1792",
_ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}")
};

// Validate quality and style
var supportedQualities = new[] { "standard", "hd" };
var supportedStyles = new[] { "vivid", "natural" };

if (!string.IsNullOrEmpty(imageSettings.Quality) && !supportedQualities.Contains(imageSettings.Quality))
{
throw new NotSupportedException($"The provided quality '{imageSettings.Quality}' is not supported.");
}

if (!string.IsNullOrEmpty(imageSettings.Style) && !supportedStyles.Contains(imageSettings.Style))
{
throw new NotSupportedException($"The provided style '{imageSettings.Style}' is not supported.");
}

// Create the request body for the image generation
var requestBody = JsonSerializer.Serialize(new TextToImageRequest
{
Model = imageSettings.ModelId ?? this._modelId,
Prompt = input.Text ?? string.Empty,
Size = size,
Count = imageSettings.ImageCount ?? 1,
Quality = imageSettings.Quality ?? "standard",
Style = imageSettings.Style ?? "vivid"
});

// Execute the request using the core client and return Image objects
var imageStrings = await this._core.ExecuteImageGenerationRequestAsync(OpenAIEndpoint, requestBody, x => x.Url ?? x.AsBase64, cancellationToken).ConfigureAwait(false);

// Convert the strings to ImageContent objects
var images = new List<ImageContent>();
foreach (var imageString in imageStrings)
{
if (Uri.TryCreate(imageString, UriKind.Absolute, out var uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps))
{
images.Add(new ImageContent(uriResult));
}
else
{
images.Add(new ImageContent($"data:;base64,{imageString}"));
}
}

return images.AsReadOnly();
}

private async Task<string> GenerateImageAsync(
string? model,
string description,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,16 @@ internal sealed class TextToImageRequest
/// </summary>
[JsonPropertyName("response_format")]
public string Format { get; set; } = "url";

/// <summary>
/// Image quality, "standard" or "hd"
/// </summary>
[JsonPropertyName("quality")]
public string Quality { get; set; } = "standard";

/// <summary>
/// Image style, "vivid" or "natural"
/// </summary>
[JsonPropertyName("style")]
public string Style { get; set; } = "vivid";
}
Loading
Loading