diff --git a/README.md b/README.md index b25ee6a9..c3965797 100644 --- a/README.md +++ b/README.md @@ -63,9 +63,23 @@ pip install -r requirements.txt # prepare your private OpenAI key (for Linux) export OPENAI_API_KEY={Your_Private_Openai_Key} +# if you're using Azure OpenAI service, please add the following settings (for Linux) +export OPENAI_API_TYPE=azure +export OPENAI_API_VERSION=2022-12-01 +export OPENAI_API_BASE=https://{your-resource-name}.openai.azure.com +export OPENAI_API_KEY={Your_Private_Openai_Key} +export OPENAI_API_AZURE_DEPLOYMENT={Your_Azure_Deployment_Name} + # prepare your private OpenAI key (for Windows) set OPENAI_API_KEY={Your_Private_Openai_Key} +# if you're using Azure OpenAI service, please add the following settings (for Windows) +set OPENAI_API_TYPE=azure +set OPENAI_API_VERSION=2022-12-01 +set OPENAI_API_BASE=https://{your-resource-name}.openai.azure.com +set OPENAI_API_KEY={Your_Private_Openai_Key} +set OPENAI_API_AZURE_DEPLOYMENT={Your_Azure_Deployment_Name} + # Start Visual ChatGPT ! # You can specify the GPU/CPU assignment by "--load", the parameter indicates which # Visual Foundation Model to use and where it will be loaded to @@ -95,28 +109,28 @@ python visual_chatgpt.py --load "ImageCaptioning_cuda:0,ImageEditing_cuda:0, Here we list the GPU memory usage of each visual foundation model, you can specify which one you like: | Foundation Model | GPU Memory (MB) | -|------------------------|-----------------| -| ImageEditing | 3981 | -| InstructPix2Pix | 2827 | -| Text2Image | 3385 | -| ImageCaptioning | 1209 | -| Image2Canny | 0 | -| CannyText2Image | 3531 | -| Image2Line | 0 | -| LineText2Image | 3529 | -| Image2Hed | 0 | -| HedText2Image | 3529 | -| Image2Scribble | 0 | -| ScribbleText2Image | 3531 | -| Image2Pose | 0 | -| PoseText2Image | 3529 | -| Image2Seg | 919 | -| SegText2Image | 3529 | -| Image2Depth | 0 | -| DepthText2Image | 3531 | -| Image2Normal | 0 | -| NormalText2Image | 3529 | -| VisualQuestionAnswering| 1495 | +| ----------------------- | --------------- | +| ImageEditing | 3981 | +| InstructPix2Pix | 2827 | +| Text2Image | 3385 | +| ImageCaptioning | 1209 | +| Image2Canny | 0 | +| CannyText2Image | 3531 | +| Image2Line | 0 | +| LineText2Image | 3529 | +| Image2Hed | 0 | +| HedText2Image | 3529 | +| Image2Scribble | 0 | +| ScribbleText2Image | 3531 | +| Image2Pose | 0 | +| PoseText2Image | 3529 | +| Image2Seg | 919 | +| SegText2Image | 3529 | +| Image2Depth | 0 | +| DepthText2Image | 3531 | +| Image2Normal | 0 | +| NormalText2Image | 3529 | +| VisualQuestionAnswering | 1495 | ## Acknowledgement We appreciate the open source of the following projects: diff --git a/visual_chatgpt.py b/visual_chatgpt.py index be7a8a4d..c4edad5f 100644 --- a/visual_chatgpt.py +++ b/visual_chatgpt.py @@ -23,7 +23,7 @@ from langchain.agents.initialize import initialize_agent from langchain.agents.tools import Tool from langchain.chains.conversation.memory import ConversationBufferMemory -from langchain.llms.openai import OpenAI +from langchain.llms.openai import AzureOpenAI,OpenAI VISUAL_CHATGPT_PREFIX = """Visual ChatGPT is designed to be able to assist with a wide range of text and visual related tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. Visual ChatGPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. @@ -224,6 +224,13 @@ def get_new_image_name(org_img_name, func_name="update"): return os.path.join(head, new_file_name) +def get_llm_instance(): + openai_type = os.getenv('OPENAI_API_TYPE',"") + if openai_type == "azure": + deployment = os.getenv('OPENAI_API_AZURE_DEPLOYMENT', "") + return AzureOpenAI(temperature=0, deployment_name=deployment) + return OpenAI(temperature=0) + class MaskFormer: def __init__(self, device): @@ -921,7 +928,7 @@ def inference(self, inputs): class InfinityOutPainting: template_model = True # Add this line to show this is a template model. def __init__(self, ImageCaptioning, ImageEditing, VisualQuestionAnswering): - self.llm = OpenAI(temperature=0) + self.llm = get_llm_instance() self.ImageCaption = ImageCaptioning self.ImageEditing = ImageEditing self.ImageVQA = VisualQuestionAnswering @@ -1042,7 +1049,7 @@ def __init__(self, load_dict): if e.startswith('inference'): func = getattr(instance, e) self.tools.append(Tool(name=func.name, description=func.description, func=func)) - self.llm = OpenAI(temperature=0) + self.llm = get_llm_instance() self.memory = ConversationBufferMemory(memory_key="chat_history", output_key='output') def init_agent(self, lang): self.memory.clear() #clear previous history