microsoft · RogerBarreto · Aug 9, 2024 · Aug 12, 2024 · Aug 14, 2024 · Aug 14, 2024
@@ -4,6 +4,7 @@
 using Microsoft.Extensions.Http.Resilience;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.OpenAI;
 using Microsoft.SemanticKernel.TextToImage;
 
 namespace TextToImage;
@@ -78,6 +79,24 @@ A cute baby sea otter
         */
     }
 
+    [Fact]
+    public async Task SimpleTextToImageExampleAsync()
+    {
+        var builder = Kernel.CreateBuilder()
+           .AddAzureOpenAITextToImage( // Add your text to image service
+               deploymentName: TestConfiguration.AzureOpenAI.ImageDeploymentName,
+               endpoint: TestConfiguration.AzureOpenAI.ImageEndpoint,
+               apiKey: TestConfiguration.AzureOpenAI.ImageApiKey,
+               modelId: TestConfiguration.AzureOpenAI.ImageModelId);
+
+        var kernel = builder.Build();
+        var service = kernel.GetRequiredService<ITextToImageService>();
+
+        var generatedImages = await service.GetImageContentsAsync(new TextContent("A cute baby sea otter"), new OpenAITextToImageExecutionSettings { Width = 1024, Height = 1024 });
+
+        this.Output.WriteLine(generatedImages[0].Uri!.ToString());
+    }
+
     [Fact(Skip = "Generating the Image can take too long and often break the test")]
     public async Task AzureOpenAIDallEAsync()
     {

diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs
@@ -209,4 +209,77 @@ internal void AddAttribute(string key, string? value)
             this._attributes.Add(key, value);
         }
     }
+
+    /// <inheritdoc/>
+    public async Task<IReadOnlyList<ImageContent>> GetImageContentsAsync(TextContent input, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
+    {
+        var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings);
+
+        Verify.NotNull(input);
+
+        var size = (imageSettings.Width, imageSettings.Height) switch
+        {
+            (256, 256) => ImageSize.Size256x256,
+            (512, 512) => ImageSize.Size512x512,
+            (1024, 1024) => ImageSize.Size1024x1024,
+            (1792, 1024) => ImageSize.Size1792x1024,
+            (1024, 1792) => ImageSize.Size1024x1792,
+            _ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}")
+        };
+
+        Response<ImageGenerations> imageGenerations;
+        try
+        {
+            var options = new ImageGenerationOptions
+            {
+                DeploymentName = this._deploymentName,
+                ImageCount = imageSettings.ImageCount,
+                Prompt = input.Text,
+                Size = size,
+            };
+
+            if (imageSettings.Quality is not null)
+            {
+                options.Quality = imageSettings.Quality;
+            }
+            if (imageSettings.Style is not null)
+            {
+                options.Style = imageSettings.Style;
+            }
+
+            imageGenerations = await this._client.GetImageGenerationsAsync(options, cancellationToken).ConfigureAwait(false);
+        }
+        catch (RequestFailedException e)
+        {
+            throw e.ToHttpOperationException();
+        }
+
+        if (!imageGenerations.HasValue)
+        {
+            throw new KernelException("The response does not contain an image result");
+        }
+
+        if (imageGenerations.Value.Data.Count == 0)
+        {
+            throw new KernelException("The response does not contain any image");
+        }
+
+        List<ImageContent> images = [];
+        foreach (var image in imageGenerations.Value.Data)
+        {
+            if (image.Url is not null)
+            {
+                images.Add(new ImageContent(image.Url));
+            }
+            else if (image.Base64Data is not null)
+            {
+                images.Add(new ImageContent($"data:;base64,{image.Base64Data}"));
+            }
+            else
+            {
+                throw new NotSupportedException("Image is neither an URL nor a base64 data");
+            }
+        }
+        return images;
+    }
 }
diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageExecutionSettings.cs
@@ -0,0 +1,159 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Microsoft.SemanticKernel.Text;
+
+namespace Microsoft.SemanticKernel.Connectors.OpenAI;
+
+/// <summary>
+/// Text to image execution settings for an OpenAI image generation request.
+/// </summary>
+[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)]
+public sealed class OpenAITextToImageExecutionSettings : PromptExecutionSettings
+{
+    private const int DefaultWidth = 1024;
+    private const int DefaultHeight = 1024;
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="OpenAITextToImageExecutionSettings"/> class.
+    /// </summary>
+    public OpenAITextToImageExecutionSettings()
+    {
+        this.Width = DefaultWidth;
+        this.Height = DefaultHeight;
+    }
+    /// <summary>
+    /// Width of the generated image.
+    /// </summary>
+    public int Width
+    {
+        get => this._width;
+
+        set
+        {
+            this.ThrowIfFrozen();
+            this._width = value;
+        }
+    }
+
+    /// <summary>
+    /// The quality of the image that will be generated.
+    /// `hd` creates images with finer details and greater consistency across the image.
+    /// This param is only supported for dall-e-3.
+    /// </summary>
+    public string? Quality
+    {
+        get => this._quality;
+
+        set
+        {
+            this.ThrowIfFrozen();
+            this._quality = value;
+        }
+    }
+
+    /// <summary>
+    /// The number of images to generate. Must be between 1 and 10.
+    /// For dall-e-3, only ImageCount = 1 is supported.
+    /// </summary>
+    public int? ImageCount
+    {
+        get => this._imageCount;
+
+        set
+        {
+            this.ThrowIfFrozen();
+            this._imageCount = value;
+        }
+    }
+
+    /// <summary>
+    /// The style of the generated images. Must be one of vivid or natural.
+    /// Vivid causes the model to lean towards generating hyper-real and dramatic images.
+    /// Natural causes the model to produce more natural, less hyper-real looking images.
+    /// This param is only supported for dall-e-3.
+    /// </summary>
+    public string? Style
+    {
+        get => this._style;
+
+        set
+        {
+            this.ThrowIfFrozen();
+            this._style = value;
+        }
+    }
+
+    /// <summary>
+    /// Height of the generated image.
+    /// </summary>
+    public int Height
+    {
+        get => this._height;
+
+        set
+        {
+            this.ThrowIfFrozen();
+            this._height = value;
+        }
+    }
+
+    /// <inheritdoc/>
+    public override void Freeze()
+    {
+        if (this.IsFrozen)
+        {
+            return;
+        }
+
+        base.Freeze();
+    }
+
+    /// <inheritdoc/>
+    public override PromptExecutionSettings Clone()
+    {
+        return new OpenAITextToImageExecutionSettings()
+        {
+            ModelId = this.ModelId,
+            ExtensionData = this.ExtensionData is not null ? new Dictionary<string, object>(this.ExtensionData) : null,
+            Width = this.Width,
+            Height = this.Height,
+        };
+    }
+
+    /// <summary>
+    /// Create a new settings object with the values from another settings object.
+    /// </summary>
+    /// <param name="executionSettings">Template configuration</param>
+    /// <param name="defaultMaxTokens">Default max tokens</param>
+    /// <returns>An instance of OpenAIPromptExecutionSettings</returns>
+    public static OpenAITextToImageExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings, int? defaultMaxTokens = null)
+    {
+        if (executionSettings is null)
+        {
+            return new OpenAITextToImageExecutionSettings();
+        }
+
+        if (executionSettings is OpenAITextToImageExecutionSettings settings)
+        {
+            return settings;
+        }
+
+        var json = JsonSerializer.Serialize(executionSettings);
+
+        var openAIExecutionSettings = JsonSerializer.Deserialize<OpenAITextToImageExecutionSettings>(json, JsonOptionsCache.ReadPermissive);
+        return openAIExecutionSettings!;
+    }
+
+    #region private ================================================================================
+
+    private int _width;
+    private int _height;
+    private int? _imageCount;
+    private string? _quality;
+    private string? _style;
+
+    #endregion
+}
diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/OpenAITextToImageService.cs
@@ -3,6 +3,7 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
 using System.Net.Http;
 using System.Text.Json;
 using System.Threading;
@@ -93,6 +94,75 @@ public Task<string> GenerateImageAsync(string description, int width, int height
         return this.GenerateImageAsync(this._modelId, description, width, height, "url", x => x.Url, cancellationToken);
     }
 
+    /// <inheritdoc/>
+    public async Task<IReadOnlyList<ImageContent>> GetImageContentsAsync(
+        TextContent input,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        CancellationToken cancellationToken = default)
+    {
+        // Ensure the input is valid
+        Verify.NotNull(input);
+
+        // Convert the generic execution settings to OpenAI-specific settings
+        var imageSettings = OpenAITextToImageExecutionSettings.FromExecutionSettings(executionSettings);
+
+        // Determine the size of the image based on the width and height settings
+        var size = (imageSettings.Width, imageSettings.Height) switch
+        {
+            (256, 256) => "256x256",
+            (512, 512) => "512x512",
+            (1024, 1024) => "1024x1024",
+            (1792, 1024) => "1792x1024",
+            (1024, 1792) => "1024x1792",
+            _ => throw new NotSupportedException($"The provided size is not supported: {imageSettings.Width}x{imageSettings.Height}")
+        };
+
+        // Validate quality and style
+        var supportedQualities = new[] { "standard", "hd" };
+        var supportedStyles = new[] { "vivid", "natural" };
+
+        if (!string.IsNullOrEmpty(imageSettings.Quality) && !supportedQualities.Contains(imageSettings.Quality))
+        {
+            throw new NotSupportedException($"The provided quality '{imageSettings.Quality}' is not supported.");
+        }
+
+        if (!string.IsNullOrEmpty(imageSettings.Style) && !supportedStyles.Contains(imageSettings.Style))
+        {
+            throw new NotSupportedException($"The provided style '{imageSettings.Style}' is not supported.");
+        }
+
+        // Create the request body for the image generation
+        var requestBody = JsonSerializer.Serialize(new TextToImageRequest
+        {
+            Model = imageSettings.ModelId ?? this._modelId,
+            Prompt = input.Text ?? string.Empty,
+            Size = size,
+            Count = imageSettings.ImageCount ?? 1,
+            Quality = imageSettings.Quality ?? "standard",
+            Style = imageSettings.Style ?? "vivid"
+        });
+
+        // Execute the request using the core client and return Image objects
+        var imageStrings = await this._core.ExecuteImageGenerationRequestAsync(OpenAIEndpoint, requestBody, x => x.Url ?? x.AsBase64, cancellationToken).ConfigureAwait(false);
+
+        // Convert the strings to ImageContent objects
+        var images = new List<ImageContent>();
+        foreach (var imageString in imageStrings)
+        {
+            if (Uri.TryCreate(imageString, UriKind.Absolute, out var uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps))
+            {
+                images.Add(new ImageContent(uriResult));
+            }
+            else
+            {
+                images.Add(new ImageContent($"data:;base64,{imageString}"));
+            }
+        }
+
+        return images.AsReadOnly();
+    }
+
     private async Task<string> GenerateImageAsync(
         string? model,
         string description,

diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/TextToImageRequest.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/TextToImageRequest.cs
@@ -39,4 +39,16 @@ internal sealed class TextToImageRequest
     /// </summary>
     [JsonPropertyName("response_format")]
     public string Format { get; set; } = "url";
+
+    /// <summary>
+    /// Image quality, "standard" or "hd"
+    /// </summary>
+    [JsonPropertyName("quality")]
+    public string Quality { get; set; } = "standard";
+
+    /// <summary>
+    /// Image style, "vivid" or "natural"
+    /// </summary>
+    [JsonPropertyName("style")]
+    public string Style { get; set; } = "vivid";
 }