-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
vision_image_gen_ui_local.py
253 lines (205 loc) · 11.1 KB
/
vision_image_gen_ui_local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"""
This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation or a visible attribution in the user interface would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Creator: bigsk1
"""
import os
import glob
import base64
from PIL import Image
import requests
from openai import OpenAI
import streamlit as st
# Sidebar for API key input
api_key = st.sidebar.text_input("Enter your API key", type="password")
if not api_key:
api_key = os.getenv("OPENAI_API_KEY") or "YOUR_API_KEY"
# Initialize the OpenAI client with the API Key provided by the user or from the environment
# client = OpenAI(api_key=api_key)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY") or "YOUR_API_KEY")
st.title('AI Image Analyzer and Generator')
st.sidebar.markdown("""
## Instructions
- Provide your OpenAI API key above to authenticate.
- Upload images from a url or locally.
- Once image is downloaded provide detailed instructions that will be added to GPT Vision's prompt of the image.
## About
This tool uses OpenAI's services to analyze images and generate new ones based on your descriptions. Ensure your API key is valid to use these features.
## Usage Tips
- Use clear and descriptive modifications for best results.
- Explore different descriptions to see how they influence the generated images.
created by bigsk1
""")
def save_image(image_bytes, filename, output_dir="original_image"):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
file_path = os.path.join(output_dir, filename)
with open(file_path, 'wb') as image_file:
image_file.write(image_bytes)
st.write(f"Image saved to {file_path}")
def download_and_encode_image(image_input):
if image_input.startswith(('http://', 'https://')):
response = requests.get(image_input, timeout=10)
image_bytes = response.content
filename = image_input.split('/')[-1]
else:
with open(image_input, "rb") as image_file:
image_bytes = image_file.read()
filename = os.path.basename(image_input)
save_image(image_bytes, filename) # Save the image to the local directory
return base64.b64encode(image_bytes).decode('utf-8') ## Assuming all your defined functions are here
def get_image_analysis_streamlit(base64_image):
"""Displays image analysis/description using GPT-4 Vision in Streamlit."""
try:
response = client.chat.completions.create(
model="gpt-4-vision-preview",
stream=True,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "describe this image as a prompt for a text to image model"},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
],
}
],
max_tokens=250,
)
responses = ""
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
responses += str(chunk.choices[0].delta.content)
print("\n" + "="*50 + "\nVision Response:\n" + "="*50)
print(responses.rstrip())
return responses.rstrip()
except Exception as e:
print(f"An error occurred during image analysis: {e}")
return None
def modify_description_streamlit(original_description):
"""Allows users to modify the image description using a Streamlit text input."""
st.write(f"Original Description: {original_description}")
modification = st.text_input("How would you like to modify the image description? (e.g., 'add a hat, change the background, etc.'): ").strip()
if modification:
new_description = f"{original_description}, modified to include {modification}"
else:
new_description = original_description
return new_description
def generate_image_with_dalle_streamlit(prompt):
try:
print("\n" + "="*50 + "\nFinal Prompt Sent to DALL-E 3:\n" + "="*50)
print(prompt)
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1792x1024",
quality="standard",
response_format="b64_json",
n=1,
)
if response:
b64_data = response.data[0].b64_json
revised_prompt = response.data[0].revised_prompt
st.session_state['b64_image'] = b64_data # Store in session for display
return b64_data, revised_prompt
except Exception as e:
st.error(f"Error generating image: {e}")
return None, None
def save_base64_image_streamlit(b64_data, original_name, output_dir="generated_images"):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
base_filename = os.path.splitext(os.path.basename(original_name))[0]
# Logic to create a unique filename
existing_files = glob.glob(os.path.join(output_dir, f"{base_filename}_generated_*.png"))
highest_num = 0
for f in existing_files:
try:
num = int(f.rsplit('_', 1)[-1].split('.')[0])
highest_num = max(highest_num, num)
except ValueError:
continue
new_num = highest_num + 1
new_filename = f"{base_filename}_generated_{new_num:02d}.png"
file_path = os.path.join(output_dir, new_filename)
img_data = base64.b64decode(b64_data)
with open(file_path, 'wb') as f:
f.write(img_data)
st.success(f"Generated image saved as {new_filename}")
def show_image_gallery(directory="generated_images"):
if os.path.exists(directory):
images = os.listdir(directory)
for image in images:
# Use a combination of directory and image name to ensure the key is unique
unique_key = f"{directory}_{image}"
col1, col2 = st.columns([20, 1]) # Adjust the ratio as needed
with col1: # Image display column
image_path = os.path.join(directory, image)
st.image(image_path, caption=image, use_column_width=True)
with col2: # Deletion button column
# Use the unique key for the button
if st.button("X", key=unique_key):
os.remove(image_path) # Delete the image file
st.rerun()
def clear_inputs():
# Explicitly clear the relevant session state keys
keys_to_clear = ['image_input', 'modification', 'last_image_input', 'original_description', 'process_image']
for key in keys_to_clear:
if key in st.session_state:
del st.session_state[key]
st.rerun() # Force a rerun of the app to reset state
if __name__ == "__main__":
# Initialize or reset session state keys
if 'image_input' not in st.session_state:
st.session_state['image_input'] = ""
if 'modification' not in st.session_state:
st.session_state['modification'] = ""
# Input for image URL or path with session state management
st.session_state['image_input'] = st.text_input("Enter the path to a local image or an image URL:", value=st.session_state['image_input'])
# Process the image input if it's provided
if st.session_state['image_input']:
# Check if it's a new image input or the same as before
if 'last_image_input' not in st.session_state or st.session_state['last_image_input'] != st.session_state['image_input']:
base64_image = download_and_encode_image(st.session_state['image_input'])
original_description = get_image_analysis_streamlit(base64_image)
st.session_state['original_description'] = original_description
st.session_state['last_image_input'] = st.session_state['image_input']
st.session_state['modification'] = "" # Reset modification input for new image
# Show original description to the user
if 'original_description' in st.session_state:
st.write(f"Description from GPT Vision: {st.session_state['original_description']}")
# Modification text input
st.session_state['modification'] = st.text_input("Enter your description to add for Dalle here:", value=st.session_state['modification'])
if 'modification' in st.session_state and st.session_state['modification']:
# This ensures we only print when there's an actual modification provided.
print("\n" + "="*50 + "\nUser's Modification Input:\n" + "="*50)
print(st.session_state['modification'])
# Generate modified image on button click
if st.button("Generate Modified Image"):
modified_description = f"{st.session_state['original_description']} {st.session_state['modification']}" if st.session_state['modification'] else st.session_state['original_description']
b64_data, revised_prompt = generate_image_with_dalle_streamlit(modified_description)
if b64_data:
img_data = base64.b64decode(b64_data)
st.image(img_data, caption="Generated Image", use_column_width=True)
save_base64_image_streamlit(b64_data, st.session_state['image_input'])
else:
st.error("Unable to generate modified image due to an error.")
# Clear button to reset inputs and state
if st.button("Clear & Start Over"):
clear_inputs()
st.title("Image Gallery")
# Expanders for viewing images in directories
with st.expander("View Original Images"):
show_image_gallery(directory="original_image")
# Display the image gallery with deletion options
with st.expander("View Generated Images"):
show_image_gallery(directory="generated_images")
st.markdown("""
### How it Works:
- The app first downloads the image from the provided URL or path locally and analyzes it using the pre-trained AI model gpt-4-vision-preview to generate a description.
- You're then given the opportunity to modify this description to guide the image generation process, the original description from the vision model and your included description are used.
- Finally, the app uses DALL-E 3 to generate a new image 1790x1024 based on the modified description.
- You can see the original image and then newly created image. Right click to save.
""")