From 113de21586545279fc4fe71474bca032f16572ac Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Tue, 12 Sep 2023 01:57:28 +0530 Subject: [PATCH 01/15] Added tool use prompt --- powerpoint_generative_ai/domain/prompts.py | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 02d9893..46fe24d 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -1,3 +1,64 @@ +TOOL_USE_PROMPT = """ +YOU MUST ALWAYS OUTPUT IN THE GIVEN FORMAT. EVEN IF OTHER OUTPUTS ARE DIFFERENT. +You are an analyst and a masterful tool user. Your job right now is to determine whether to call a tool or not call a tool. + +You must analyze the given context. + +You can use the following tools: + +- generate_chart(query): Identify if the user has passed data for a chart (different from a diagram). If user wants a specific chart, give him that. If not, give him the best chart for the data. The query should include chart type too. + +Only return the value of the most applicable chart type: + +{ + "BAR_CLUSTERED": {"value": 57, "description": "Clustered Bar."}, + "BAR_OF_PIE": {"value": 71, "description": "Bar of Pie."}, + "BAR_STACKED": {"value": 58, "description": "Stacked Bar."}, + "BAR_STACKED_100": {"value": 59, "description": "100% Stacked Bar."}, + "COLUMN_CLUSTERED": {"value": 51, "description": "Clustered Column."}, + "COLUMN_STACKED": {"value": 52, "description": "Stacked Column."}, + "COLUMN_STACKED_100": {"value": 53, "description": "100% Stacked Column."}, + "LINE": {"value": 4, "description": "Line."}, + "LINE_MARKERS": {"value": 65, "description": "Line with Markers."}, + "LINE_MARKERS_STACKED": {"value": 66, "description": "Stacked Line with Markers."}, + "LINE_MARKERS_STACKED_100": {"value": 67, "description": "100% Stacked Line with Markers."}, + "LINE_STACKED": {"value": 63, "description": "Stacked Line."}, + "LINE_STACKED_100": {"value": 64, "description": "100% Stacked Line."} +} + +- generate_mermaid_chart(query): in here you can pass mermaid syntax text to generate a diagram (different from a chart). If user wants a graph, give him one using this. The query should include chart type too. + +==== + +All your outputs have to be in this format: + + +Think before your actual output, think: +- What is the user asking? +- What is the text about? +- Should we use a tool here? +- How should we use the tool here? +- Dowe need a chart or a diagram? +- What should be the output? +- Analyze the data. + +Plan ahead here. + + + +IF you do not want to call a function, output- call:none:none + +IF you want to call a function, output in this format: +call:func_name:param +example - call:generate_chart:{"value": 51, "description": "Clustered Column."} + + + +YOU CAN ONLY DO ONE THING, either generate an output, or call a function. But always output in this given format. +Do not hallucinate. +""" + + DECK_CREATION_SYSTEM_PROMPT = """Take the user input and create content for a slideshow related to the user's input. You will generate titles for the slides, content that tells a cohesive story throughout the slides. DO NOT title each slide like 'Slide X: ...'. Data may be provided in the input, if it has been provided determine the best slide to include a chart. ONLY INSERT CHARTS when data is provided. From 9aa7752cb9299f422be5394aedb025eb45214b2c Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Wed, 13 Sep 2023 17:50:37 +0530 Subject: [PATCH 02/15] Test commit --- powerpoint_generative_ai/domain/prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 46fe24d..a653c1f 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -55,7 +55,7 @@ YOU CAN ONLY DO ONE THING, either generate an output, or call a function. But always output in this given format. -Do not hallucinate. +Do not hallucinate. """ From a8007bdcc6e4c088dd56e88a3f07c0ecd5da9dd6 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Wed, 13 Sep 2023 17:54:21 +0530 Subject: [PATCH 03/15] Test commit --- powerpoint_generative_ai/domain/prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index a653c1f..46fe24d 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -55,7 +55,7 @@ YOU CAN ONLY DO ONE THING, either generate an output, or call a function. But always output in this given format. -Do not hallucinate. +Do not hallucinate. """ From 69b79ecd975bd5d4b78fa547fead2bad9d30be38 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Wed, 13 Sep 2023 17:54:48 +0530 Subject: [PATCH 04/15] Test commit --- powerpoint_generative_ai/domain/prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 46fe24d..a653c1f 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -55,7 +55,7 @@ YOU CAN ONLY DO ONE THING, either generate an output, or call a function. But always output in this given format. -Do not hallucinate. +Do not hallucinate. """ From 01de2b721affd88cbdaa090e6cb3f3f9959a8075 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Thu, 14 Sep 2023 02:29:03 +0530 Subject: [PATCH 05/15] Integrated tool use prompt --- powerpoint_generative_ai/domain/prompts.py | 15 ++++++++--- powerpoint_generative_ai/ppt_generator.py | 27 +++++++++----------- powerpoint_generative_ai/utils/utils.py | 19 +++++++++++++- tests.py | 29 ++++++++++++++++++++++ 4 files changed, 70 insertions(+), 20 deletions(-) create mode 100644 tests.py diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index a653c1f..c4f3f30 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -26,7 +26,9 @@ "LINE_STACKED_100": {"value": 64, "description": "100% Stacked Line."} } -- generate_mermaid_chart(query): in here you can pass mermaid syntax text to generate a diagram (different from a chart). If user wants a graph, give him one using this. The query should include chart type too. +Again, only return the value of the most applicable chart type. So for line chart you would use 4, etc. + +- generate_mermaid_chart(query): in here you can pass mermaid syntax text to generate a diagram (different from a chart). If user wants a diagram, give him one using this. ==== @@ -44,18 +46,23 @@ Plan ahead here. +DO NOT USE MORE THAN 25 WORDS TO THINK. + IF you do not want to call a function, output- call:none:none IF you want to call a function, output in this format: call:func_name:param -example - call:generate_chart:{"value": 51, "description": "Clustered Column."} +example - call|generate_chart|51 + +If you do not want to call a function, output- call|none|none -YOU CAN ONLY DO ONE THING, either generate an output, or call a function. But always output in this given format. -Do not hallucinate. +===== +YOU CAN ONLY DO ONE THING, either generate an output, or call a function. But always output in this given format. The output will be used in our program, so it has to be in this format. Otherwise the code will break. +Do not hallucinate. """ diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 5bb7fde..38d1408 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -4,13 +4,12 @@ from .domain.exceptions import InvalidModel from .domain.prompts import ( DECK_CREATION_SYSTEM_PROMPT, - CHART_DATA_IDENTIFICATION, - BEST_CHART_FOR_DATA_SYSTEM_PROMPT, TITLE_GEN_SYSTEM_PROMPT, - FILENAME_SYSTEM_PROMPT + FILENAME_SYSTEM_PROMPT, + TOOL_USE_PROMPT ) from .ppt.ppt_creator import PowerPointCreator -from .utils.utils import format_simple_message_for_gpt, call_gpt_with_backoff +from .utils.utils import format_simple_message_for_gpt, call_gpt_with_backoff, parse_function_call_output class PowerPointGenerator: def __init__(self, openai_key: str, model: str = "gpt-4"): @@ -24,20 +23,18 @@ def __init__(self, openai_key: str, model: str = "gpt-4"): def create_powerpoint(self, user_input: str) -> str: """Generates a powerpoint based on the user's input""" - # identify if the user passed in data for a chart data_messages = format_simple_message_for_gpt( - system_message=CHART_DATA_IDENTIFICATION, user_message=user_input) + system_message=TOOL_USE_PROMPT, + user_message=f"This is the user input: \n{user_input}\n Analyze this and output in the given format.") data_response = call_gpt_with_backoff( messages=data_messages, temperature=0, max_length=MAX_CONTENT_LENGTH) - - # data was found in the input, determine which chart type fits the data best - if data_response.lower() == "data found": - best_chart_messages = format_simple_message_for_gpt( - system_message=BEST_CHART_FOR_DATA_SYSTEM_PROMPT, user_message=user_input) - best_chart_response = call_gpt_with_backoff( - messages=best_chart_messages, temperature=0) - # append this chart type instruction to the user input for deck creation - user_input += f"\nUse chart type: {best_chart_response}" + + + func, param = parse_function_call_output(data_response) + if func != "none": + if func == "generate_chart": + best_chart_response = param + user_input += f"\nUse chart type: {best_chart_response}" # create the deck based on the user input and load its json deck_messages = format_simple_message_for_gpt( diff --git a/powerpoint_generative_ai/utils/utils.py b/powerpoint_generative_ai/utils/utils.py index 98117d3..e2325f8 100644 --- a/powerpoint_generative_ai/utils/utils.py +++ b/powerpoint_generative_ai/utils/utils.py @@ -1,3 +1,4 @@ +import re import backoff import logging import openai @@ -70,4 +71,20 @@ def call_gpt(messages: List, model: str = "gpt-4", temperature: float = 0.7, max frequency_penalty=0.0, top_p=1 ) - return response['choices'][0]['message']['content'] \ No newline at end of file + return response['choices'][0]['message']['content'] + + +def parse_function_call_output(input_text: str) -> list[str]: + """ + Removes the tags from the input_text, clean it and return the function name and params. + """ + + pattern = ".*?" + cleaned_text = re.sub(pattern, '', input_text, flags=re.DOTALL) + text = cleaned_text.replace("", "").replace("", "").strip() + + + function_name = text.split("|")[1] + param = text.split("|")[2] + + return [function_name, param] \ No newline at end of file diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..79405c7 --- /dev/null +++ b/tests.py @@ -0,0 +1,29 @@ +from dotenv import load_dotenv +import os +load_dotenv() + + +from powerpoint_generative_ai.ppt_generator import PowerPointGenerator + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") + +USER_TEXTS = [ +"""create a six slide powerpoint about the growing obesity rate and its effect on health insurance premiums. here is some data for a chart: +x axis: 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024 +US: 5%, 10%, 15%, 20%, 25%, 30%, 35%, 40% +UK: 3%, 6%, 9%, 12%, 15%, 18%, 21%, 24% +RU: 2%, 4%, 6%, 8%, 10%, 12%, 14%, 16% +FR: 7%, 14%, 21%, 28%, 35%, 42%, 49%, 56% +IT: 1%, 2%, 3%, 4%, 5%, 6%, 7%, 8%""", +] + + + +def generate_ppt(): + ppt_generator = PowerPointGenerator(OPENAI_API_KEY) + powerpoint_files = [ppt_generator.create_powerpoint(user_input=user_text) for user_text in USER_TEXTS] + + + +if __name__ == "__main__": + generate_ppt() \ No newline at end of file From 0ef3fb3aa812ba8a99a390c2a2a6a5496b3306d0 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Thu, 14 Sep 2023 04:31:43 +0530 Subject: [PATCH 06/15] Added diagram generation tool --- powerpoint_generative_ai/domain/prompts.py | 25 +++++++++++++++------ powerpoint_generative_ai/ppt/ppt_creator.py | 9 +++++++- powerpoint_generative_ai/ppt_generator.py | 6 ++++- powerpoint_generative_ai/utils/utils.py | 19 +++++++++++++++- tests.py | 12 +++++----- 5 files changed, 54 insertions(+), 17 deletions(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index c4f3f30..b8a759c 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -6,7 +6,7 @@ You can use the following tools: -- generate_chart(query): Identify if the user has passed data for a chart (different from a diagram). If user wants a specific chart, give him that. If not, give him the best chart for the data. The query should include chart type too. +- generate_chart(param): Identify if the user has passed data for a chart (different from a diagram). If user wants a specific chart, give him that. If not, give him the best chart for the data. The param should include chart type too. Only return the value of the most applicable chart type: @@ -28,7 +28,16 @@ Again, only return the value of the most applicable chart type. So for line chart you would use 4, etc. -- generate_mermaid_chart(query): in here you can pass mermaid syntax text to generate a diagram (different from a chart). If user wants a diagram, give him one using this. +- generate_mermaid_diagram(param): in here you can pass mermaid syntax text to generate a diagram (different from a chart). If user wants a diagram, give him one using this. + +Pass the mermaid syntax text AND descriptive name of the diagram in the param. Analyze what the user wants, then convert it into a proper diagram. Then pass the diagram to the function. +param will look like: "graph TD; A-->B; A-->C; B-->D; C-->D;" @,@ "Diagram name" + +User does not need to pass any data for diagrams, make the diagram on your own. Unless user has passed some data for a diagram, then use that data to make the diagram. + +Yes, use @,@ to separate the mermaid syntax text and the name of the diagram. + + ==== @@ -37,24 +46,25 @@ Think before your actual output, think: - What is the user asking? -- What is the text about? - Should we use a tool here? - How should we use the tool here? - Dowe need a chart or a diagram? -- What should be the output? - Analyze the data. Plan ahead here. -DO NOT USE MORE THAN 25 WORDS TO THINK. +DO NOT USE MORE THAN 5-7 sentences TO THINK. IF you do not want to call a function, output- call:none:none IF you want to call a function, output in this format: -call:func_name:param +call|func_name|param example - call|generate_chart|51 +example - call|generate_mermaid_diagram|graph TD; A-->B; A-->C; B-->D; C-->D;@,@Diagram name + +Remember that these are just examples. Make your own diagrams and charts. If you do not want to call a function, output- call|none|none @@ -100,7 +110,8 @@ }, { 'title': 'Slide 3', - 'content': 'This is some content for slide 3' + 'content': 'This is some content for slide 3', + 'diagram_name': 'Diagram name' } ] diff --git a/powerpoint_generative_ai/ppt/ppt_creator.py b/powerpoint_generative_ai/ppt/ppt_creator.py index 52008d4..2ab99c7 100644 --- a/powerpoint_generative_ai/ppt/ppt_creator.py +++ b/powerpoint_generative_ai/ppt/ppt_creator.py @@ -46,8 +46,9 @@ def add_slide(self, content: dict): """Helper function to add slides to powerpoint""" text_content = content.get('content', None) chart_data = content.get('chart_data', None) + image_path = content.get('diagram_name', None) LAYOUT = SLIDE_LAYOUTS['Title Slide'] - if chart_data and text_content: + if text_content and (chart_data or image_path): LAYOUT = SLIDE_LAYOUTS['Two Content'] # Text column and blank right side elif chart_data and not text_content: LAYOUT = SLIDE_LAYOUTS['Title Only'] # Just a title for a big chart @@ -66,6 +67,9 @@ def add_slide(self, content: dict): if chart_data: self.add_chart(data=chart_data, slide=slide, chart_type=content.get('chart_type', XL_CHART_TYPE.COLUMN_CLUSTERED)) + if image_path: + self.add_image(image_path=image_path, slide=slide) + def add_chart(self, data: dict, slide: Slide, x: Inches = Inches(4.75), y: Inches = Inches(2), cx: Inches=Inches(5.5), cy: Inches = Inches(4.5), chart_type: int = XL_CHART_TYPE.COLUMN_CLUSTERED): """Creates a chart and adds it to the current slide""" @@ -79,6 +83,9 @@ def add_chart(self, data: dict, slide: Slide, x: Inches = Inches(4.75), y: Inche for series in chart.series: series.has_data_labels = True + def add_image(self, image_path: str, slide: Slide, x: Inches = Inches(4.75), y: Inches = Inches(2), cx: Inches=Inches(5.5), cy: Inches = Inches(4.5)): + """Adds an image to the current slide""" + slide.shapes.add_picture(image_path, x, y, cx, cy) def save(self, file_name: str): self.presentation.save(file_name) diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 38d1408..10d9b36 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -9,7 +9,7 @@ TOOL_USE_PROMPT ) from .ppt.ppt_creator import PowerPointCreator -from .utils.utils import format_simple_message_for_gpt, call_gpt_with_backoff, parse_function_call_output +from .utils.utils import format_simple_message_for_gpt, call_gpt_with_backoff, generate_mermaid_diagram, parse_function_call_output class PowerPointGenerator: def __init__(self, openai_key: str, model: str = "gpt-4"): @@ -35,6 +35,10 @@ def create_powerpoint(self, user_input: str) -> str: if func == "generate_chart": best_chart_response = param user_input += f"\nUse chart type: {best_chart_response}" + elif func == "generate_mermaid_diagram": + mermaid_text, name = param.split("@,@") + generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + user_input += f"\We have a diagram named: '{name}.png'. \n Use it in the powerpoint." # create the deck based on the user input and load its json deck_messages = format_simple_message_for_gpt( diff --git a/powerpoint_generative_ai/utils/utils.py b/powerpoint_generative_ai/utils/utils.py index e2325f8..a3dd03f 100644 --- a/powerpoint_generative_ai/utils/utils.py +++ b/powerpoint_generative_ai/utils/utils.py @@ -3,6 +3,8 @@ import logging import openai from typing import List +import requests +import base64, zlib def setup_logger(name) -> logging.Logger: """ @@ -87,4 +89,19 @@ def parse_function_call_output(input_text: str) -> list[str]: function_name = text.split("|")[1] param = text.split("|")[2] - return [function_name, param] \ No newline at end of file + return [function_name, param] + + +def generate_mermaid_diagram(mermaid_text: str, filename: str = "diagram.png"): + """ + Takes in mermaid syntax text and generates a diagram + """ + + encoded_mermaid_text = base64.urlsafe_b64encode(zlib.compress(mermaid_text.encode("utf-8"), 9)).decode("ascii") + url = f"https://kroki.io/mermaid/png/{encoded_mermaid_text}" + response = requests.get(url) + + with open(filename, "wb") as f: + f.write(response.content) + + return filename diff --git a/tests.py b/tests.py index 79405c7..49b2c5d 100644 --- a/tests.py +++ b/tests.py @@ -8,13 +8,11 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") USER_TEXTS = [ -"""create a six slide powerpoint about the growing obesity rate and its effect on health insurance premiums. here is some data for a chart: -x axis: 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024 -US: 5%, 10%, 15%, 20%, 25%, 30%, 35%, 40% -UK: 3%, 6%, 9%, 12%, 15%, 18%, 21%, 24% -RU: 2%, 4%, 6%, 8%, 10%, 12%, 14%, 16% -FR: 7%, 14%, 21%, 28%, 35%, 42%, 49%, 56% -IT: 1%, 2%, 3%, 4%, 5%, 6%, 7%, 8%""", +"""create a six slide powerpoint about the growing obesity rate and its effect on health insurance premiums. + + +Also add a diagram about biology of fat cells. +""", ] From 4464b523144b46d3fd44a3673d58d85be3dba78e Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Fri, 15 Sep 2023 00:56:25 +0530 Subject: [PATCH 07/15] Added multiple tool use capability --- powerpoint_generative_ai/domain/prompts.py | 14 ++++++++-- powerpoint_generative_ai/ppt/ppt_creator.py | 2 +- powerpoint_generative_ai/ppt_generator.py | 31 +++++++++++++++------ powerpoint_generative_ai/utils/utils.py | 10 +++++-- tests.py => test.py | 10 ++++++- 5 files changed, 50 insertions(+), 17 deletions(-) rename tests.py => test.py (59%) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index b8a759c..5b7f5b5 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -31,10 +31,12 @@ - generate_mermaid_diagram(param): in here you can pass mermaid syntax text to generate a diagram (different from a chart). If user wants a diagram, give him one using this. Pass the mermaid syntax text AND descriptive name of the diagram in the param. Analyze what the user wants, then convert it into a proper diagram. Then pass the diagram to the function. -param will look like: "graph TD; A-->B; A-->C; B-->D; C-->D;" @,@ "Diagram name" +param will look like: "" @,@ "Diagram name" User does not need to pass any data for diagrams, make the diagram on your own. Unless user has passed some data for a diagram, then use that data to make the diagram. +You can make sophisticated diagrams and simple ones too. Try to explain the topics properly. + Yes, use @,@ to separate the mermaid syntax text and the name of the diagram. @@ -64,7 +66,9 @@ example - call|generate_chart|51 example - call|generate_mermaid_diagram|graph TD; A-->B; A-->C; B-->D; C-->D;@,@Diagram name -Remember that these are just examples. Make your own diagrams and charts. +Remember that these are just examples. Make your own diagrams and charts. Do not be limited to these. + +Understand that you can call multiple functions at the same time. Every function call must be in a seperate line. If you do not want to call a function, output- call|none|none @@ -110,10 +114,14 @@ }, { 'title': 'Slide 3', - 'content': 'This is some content for slide 3', + 'content': 'This is some detailed content for slide 3 which goes well with the diagram.', 'diagram_name': 'Diagram name' } ] +====== + +Note that the text you generate should be detailed and user should always learn something new. But do not write too much, short sentences with good information. + Note: Your output must be parsable, valid JSON. DO NOT summarize what each slide was about, the content on each slide should be meaningful information""" diff --git a/powerpoint_generative_ai/ppt/ppt_creator.py b/powerpoint_generative_ai/ppt/ppt_creator.py index 2ab99c7..24b5e1e 100644 --- a/powerpoint_generative_ai/ppt/ppt_creator.py +++ b/powerpoint_generative_ai/ppt/ppt_creator.py @@ -83,7 +83,7 @@ def add_chart(self, data: dict, slide: Slide, x: Inches = Inches(4.75), y: Inche for series in chart.series: series.has_data_labels = True - def add_image(self, image_path: str, slide: Slide, x: Inches = Inches(4.75), y: Inches = Inches(2), cx: Inches=Inches(5.5), cy: Inches = Inches(4.5)): + def add_image(self, image_path: str, slide: Slide, x: Inches = Inches(4.75), y: Inches = Inches(2), cx: Inches=Inches(4), cy: Inches = Inches(3.5)): """Adds an image to the current slide""" slide.shapes.add_picture(image_path, x, y, cx, cy) diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 10d9b36..5231010 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -30,15 +30,28 @@ def create_powerpoint(self, user_input: str) -> str: messages=data_messages, temperature=0, max_length=MAX_CONTENT_LENGTH) - func, param = parse_function_call_output(data_response) - if func != "none": - if func == "generate_chart": - best_chart_response = param - user_input += f"\nUse chart type: {best_chart_response}" - elif func == "generate_mermaid_diagram": - mermaid_text, name = param.split("@,@") - generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') - user_input += f"\We have a diagram named: '{name}.png'. \n Use it in the powerpoint." + calls = parse_function_call_output(data_response) + + diagrams = [] + for func, param in calls: + if func != "none": + if func == "generate_chart": + best_chart_response = param + user_input += f"\nUse chart type: {best_chart_response}" + elif func == "generate_mermaid_diagram": + mermaid_text, name = param.split("@,@") + generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + diagrams.append(name) + + if diagrams != []: + diagrams = "\n".join([diagram+'.png' for diagram in diagrams]) + + user_input += f""" + We have some diagrams named: + + {diagrams} + + You can use them in your powerpoint.""" # create the deck based on the user input and load its json deck_messages = format_simple_message_for_gpt( diff --git a/powerpoint_generative_ai/utils/utils.py b/powerpoint_generative_ai/utils/utils.py index a3dd03f..77b888d 100644 --- a/powerpoint_generative_ai/utils/utils.py +++ b/powerpoint_generative_ai/utils/utils.py @@ -85,11 +85,15 @@ def parse_function_call_output(input_text: str) -> list[str]: cleaned_text = re.sub(pattern, '', input_text, flags=re.DOTALL) text = cleaned_text.replace("", "").replace("", "").strip() + functions_calls = text.split("\n") + parsed_functions_calls = [] + for function_call in functions_calls: + function_name = function_call.split("|")[1] + param = function_call.split("|")[2] - function_name = text.split("|")[1] - param = text.split("|")[2] + parsed_functions_calls.append([function_name, param]) - return [function_name, param] + return parsed_functions_calls def generate_mermaid_diagram(mermaid_text: str, filename: str = "diagram.png"): diff --git a/tests.py b/test.py similarity index 59% rename from tests.py rename to test.py index 49b2c5d..96546e1 100644 --- a/tests.py +++ b/test.py @@ -10,8 +10,16 @@ USER_TEXTS = [ """create a six slide powerpoint about the growing obesity rate and its effect on health insurance premiums. +here is some data for a chart: +x axis: 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024 +US: 5%, 10%, 15%, 20%, 25%, 30%, 35%, 40% +UK: 3%, 6%, 9%, 12%, 15%, 18%, 21%, 24% +RU: 2%, 4%, 6%, 8%, 10%, 12%, 14%, 16% +FR: 7%, 14%, 21%, 28%, 35%, 42%, 49%, 56% +IT: 1%, 2%, 3%, 4%, 5%, 6%, 7%, 8% -Also add a diagram about biology of fat cells. + +Also add a diagram about biology of fat cells. And a diagram about how sugar works. """, ] From f95c4942e5666a3f1dcdf5fbda8ffbbef3decae3 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Fri, 22 Sep 2023 17:17:50 +0530 Subject: [PATCH 08/15] Updated prompts for single slide creation --- powerpoint_generative_ai/domain/prompts.py | 54 +++++++++++++++++++++- powerpoint_generative_ai/ppt_generator.py | 9 +++- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 5b7f5b5..f7e97b6 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -50,7 +50,8 @@ - What is the user asking? - Should we use a tool here? - How should we use the tool here? -- Dowe need a chart or a diagram? +- Do we need a chart or a diagram? +---- How can we generate a creative diagram? Make sure to not just copy paste the format of the example. - Analyze the data. Plan ahead here. @@ -149,4 +150,53 @@ TITLE_GEN_SYSTEM_PROMPT = """Generate a title for this powerpoint based on the content""" -FILENAME_SYSTEM_PROMPT = """Take the powerpoint title in the user text and create a short version to be used as a filename for a .pptx file""" \ No newline at end of file +FILENAME_SYSTEM_PROMPT = """Take the powerpoint title in the user text and create a short version to be used as a filename for a .pptx file""" + + +def SLIDE_CREATION_PROMPT(description): + PROMPT = f""" +Take the user input and create content for a slide in a slide show. You are given description of a single slide. You will generate a title for the slide, content that tells a cohesive story throughout the slide. DO NOT title the slide like 'Slide X: ...'. +Make sure the content you write is informative and extensive. But it doesn't need to be too long. Short sentences with good information is the key. + +You have to always output in a very specific format, here are some examples: + +example 1: +{ + 'title': 'Slide 1', + 'content': 'This is some content for slide 1' +} + +example 2: + +{ + 'title': 'Slide 1', + 'content': 'This slide has a multi-line chart', + 'chart_data': { + 'categories': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul'], + 'series': [ + {'name': 'Series 1', 'values': [1, 2, 3, 4, 5, 6, 7]}, + {'name': 'Series 2', 'values': [2, 3, 4, 5, 6, 7, 8]}, + {'name': 'Series 3', 'values': [3, 4, 5, 6, 7, 8, 9]} + ] + }, + 'chart_type': 4 +} + +example 3: + + { + 'title': 'Slide 1', + 'content': 'This is some detailed content for slide 1 which goes well with the diagram.', + 'diagram_name': 'Diagram name' + } + + + +You will be provided diagram_name and chart_data when necessary. + +Note that you are an expert, you must write excellent content. You must always output in the given format. Even if other outputs are different. +Be engaging and always provide new information. Do not summarize what the slide is about, the content on each slide should be meaningful information. + +But write short sentences with good information. Do not write too much. + +""" \ No newline at end of file diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 5231010..cdee0cb 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -1,4 +1,5 @@ import json +from typing import Union import openai from .domain.constants import MAX_CONTENT_LENGTH from .domain.exceptions import InvalidModel @@ -20,8 +21,8 @@ def __init__(self, openai_key: str, model: str = "gpt-4"): "name is correct or you have access to the model requested" ) self.model = model - - def create_powerpoint(self, user_input: str) -> str: + # str or list user_input + def create_powerpoint(self, user_input: Union[str, list]) -> str: """Generates a powerpoint based on the user's input""" data_messages = format_simple_message_for_gpt( system_message=TOOL_USE_PROMPT, @@ -77,3 +78,7 @@ def create_powerpoint(self, user_input: str) -> str: ppt = PowerPointCreator(title=title_response, slides_content=deck_json) ppt.create(file_name=filename_response) return filename_response + + + def _create_powerpoint_from_outline(outline: list) -> str: + pass \ No newline at end of file From 077e2c61512216a8b13fc7d90191aa0d47f78fe1 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Fri, 22 Sep 2023 17:53:49 +0530 Subject: [PATCH 09/15] Changed the delimiter to `<|?|>` --- powerpoint_generative_ai/domain/prompts.py | 12 ++-- powerpoint_generative_ai/ppt_generator.py | 70 +++++++++++++++++++++- powerpoint_generative_ai/utils/utils.py | 4 +- test.py | 26 +++++++- 4 files changed, 98 insertions(+), 14 deletions(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index f7e97b6..443fc86 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -40,7 +40,6 @@ Yes, use @,@ to separate the mermaid syntax text and the name of the diagram. - ==== All your outputs have to be in this format: @@ -63,15 +62,15 @@ IF you do not want to call a function, output- call:none:none IF you want to call a function, output in this format: -call|func_name|param -example - call|generate_chart|51 -example - call|generate_mermaid_diagram|graph TD; A-->B; A-->C; B-->D; C-->D;@,@Diagram name +call<|?|>func_name<|?|>param +example - call<|?|>generate_chart<|?|>51 +example - call<|?|>generate_mermaid_diagram<|?|>graph TD; A-->B; A-->C; B-->D; C-->D;@,@Diagram name Remember that these are just examples. Make your own diagrams and charts. Do not be limited to these. Understand that you can call multiple functions at the same time. Every function call must be in a seperate line. -If you do not want to call a function, output- call|none|none +If you do not want to call a function, output- call<|?|>none<|?|>none @@ -153,8 +152,7 @@ FILENAME_SYSTEM_PROMPT = """Take the powerpoint title in the user text and create a short version to be used as a filename for a .pptx file""" -def SLIDE_CREATION_PROMPT(description): - PROMPT = f""" +SLIDE_CREATION_PROMPT = """ Take the user input and create content for a slide in a slide show. You are given description of a single slide. You will generate a title for the slide, content that tells a cohesive story throughout the slide. DO NOT title the slide like 'Slide X: ...'. Make sure the content you write is informative and extensive. But it doesn't need to be too long. Short sentences with good information is the key. diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index cdee0cb..98758d7 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -7,7 +7,8 @@ DECK_CREATION_SYSTEM_PROMPT, TITLE_GEN_SYSTEM_PROMPT, FILENAME_SYSTEM_PROMPT, - TOOL_USE_PROMPT + TOOL_USE_PROMPT, + SLIDE_CREATION_PROMPT ) from .ppt.ppt_creator import PowerPointCreator from .utils.utils import format_simple_message_for_gpt, call_gpt_with_backoff, generate_mermaid_diagram, parse_function_call_output @@ -80,5 +81,68 @@ def create_powerpoint(self, user_input: Union[str, list]) -> str: return filename_response - def _create_powerpoint_from_outline(outline: list) -> str: - pass \ No newline at end of file + def create_powerpoint_from_outline(self, outline: list) -> str: + + deck = [] + for slide in outline: + user_input = slide + data_messages = format_simple_message_for_gpt( + system_message=TOOL_USE_PROMPT, + user_message=f"This is the user input: \n{user_input}\n Analyze this and output in the given format.") + data_response = call_gpt_with_backoff( + messages=data_messages, temperature=0, max_length=MAX_CONTENT_LENGTH) + + print(data_response) + calls = parse_function_call_output(data_response) + print('\n\n',calls) + # TODO(sirri69): THIS CAN BE ABSTRACTED IN A `update_prompt` function + diagrams = [] + for func, param in calls: + if func != "none": + if func == "generate_chart": + best_chart_response = param + user_input += f"\nUse chart type: {best_chart_response}" + elif func == "generate_mermaid_diagram": + mermaid_text, name = param.split("@,@") + generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + diagrams.append(name) + + if diagrams != []: + diagrams = "\n".join([diagram+'.png' for diagram in diagrams]) + + user_input += f""" + We have some diagrams named: + + {diagrams} + + You can use them in your powerpoint.""" + + + # create the slide and append it to the deck + slide_messages = format_simple_message_for_gpt( + system_message=SLIDE_CREATION_PROMPT, user_message=user_input) + slide_response = call_gpt_with_backoff( + messages=slide_messages, temperature=0.2, max_length=MAX_CONTENT_LENGTH) + slide_json = json.loads(slide_response) + deck.append(slide_json) + + title_messages = format_simple_message_for_gpt( + system_message=TITLE_GEN_SYSTEM_PROMPT, user_message=deck) + title_response = call_gpt_with_backoff(messages=title_messages) + title = title_response.replace('"', '') + + filename_message = format_simple_message_for_gpt( + system_message=FILENAME_SYSTEM_PROMPT, user_message=title_response) + filename_response = call_gpt_with_backoff( + messages=filename_message, temperature=0) + filename_response = filename_response.replace('"', '') + + ppt = PowerPointCreator(title=title, slides_content=deck) + ppt.create(file_name=filename_response) + + return filename_response + + + + + diff --git a/powerpoint_generative_ai/utils/utils.py b/powerpoint_generative_ai/utils/utils.py index 77b888d..26f2588 100644 --- a/powerpoint_generative_ai/utils/utils.py +++ b/powerpoint_generative_ai/utils/utils.py @@ -88,8 +88,8 @@ def parse_function_call_output(input_text: str) -> list[str]: functions_calls = text.split("\n") parsed_functions_calls = [] for function_call in functions_calls: - function_name = function_call.split("|")[1] - param = function_call.split("|")[2] + function_name = function_call.split("<|?|>")[1] + param = function_call.split("<|?|>")[2] parsed_functions_calls.append([function_name, param]) diff --git a/test.py b/test.py index 96546e1..43fb04c 100644 --- a/test.py +++ b/test.py @@ -24,12 +24,34 @@ ] +OUTLINE = [ + "Give a title to the presentation: Obesity and Health Insurance Premiums. Talk about how obesity effects health insurance premiums.", + """Talk about how obesity is a growing problem in the US, UK, RU, FR, IT. + here is some data for a chart: + x axis: 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024 + US: 5%, 10%, 15%, 20%, 25%, 30%, 35%, 40% + UK: 3%, 6%, 9%, 12%, 15%, 18%, 21%, 24% + RU: 2%, 4%, 6%, 8%, 10%, 12%, 14%, 16% + FR: 7%, 14%, 21%, 28%, 35%, 42%, 49%, 56% + IT: 1%, 2%, 3%, 4%, 5%, 6%, 7%, 8% + """, + "Educate the user on the biology of fat cells. Add a diagram about biology of fat cells.", + "Educate the user on how sugar works. Add a diagram about how sugar works.", + "Talk about the problems caused by obesity.", + "Provide an outro on the topic: Obesity and Health Insurance Premiums." +] + + def generate_ppt(): ppt_generator = PowerPointGenerator(OPENAI_API_KEY) powerpoint_files = [ppt_generator.create_powerpoint(user_input=user_text) for user_text in USER_TEXTS] - +def generate_ppt_from_outline(): + ppt_generator = PowerPointGenerator(OPENAI_API_KEY) + powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=OUTLINE) + print(powerpoint_files) if __name__ == "__main__": - generate_ppt() \ No newline at end of file + # generate_ppt() + generate_ppt_from_outline() \ No newline at end of file From 48fb94af1b36710d4f5b6b3a8aae7da10de1a8ce Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Fri, 22 Sep 2023 18:11:46 +0530 Subject: [PATCH 10/15] Added individual slide generation with accepting outline as input --- powerpoint_generative_ai/domain/prompts.py | 32 ++++++++++++---------- powerpoint_generative_ai/ppt_generator.py | 11 ++++---- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 443fc86..c90c7f7 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -160,32 +160,32 @@ example 1: { - 'title': 'Slide 1', - 'content': 'This is some content for slide 1' + "title": "Slide 1", + "content": "This is some content for slide 1" } example 2: { - 'title': 'Slide 1', - 'content': 'This slide has a multi-line chart', - 'chart_data': { - 'categories': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul'], - 'series': [ - {'name': 'Series 1', 'values': [1, 2, 3, 4, 5, 6, 7]}, - {'name': 'Series 2', 'values': [2, 3, 4, 5, 6, 7, 8]}, - {'name': 'Series 3', 'values': [3, 4, 5, 6, 7, 8, 9]} + "title": "Slide 1", + "content": "This slide has a multi-line chart", + "chart_data": { + "categories": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul"], + "series": [ + {"name": "Series 1", "values": [1, 2, 3, 4, 5, 6, 7]}, + {"name": "Series 2", "values": [2, 3, 4, 5, 6, 7, 8]}, + {"name": "Series 3", "values": [3, 4, 5, 6, 7, 8, 9]} ] }, - 'chart_type': 4 + "chart_type": 4 } example 3: - + { - 'title': 'Slide 1', - 'content': 'This is some detailed content for slide 1 which goes well with the diagram.', - 'diagram_name': 'Diagram name' + "title": "Slide 1", + "content": "This is some detailed content for slide 1 which goes well with the diagram.", + "diagram_name": "Diagram name" } @@ -197,4 +197,6 @@ But write short sentences with good information. Do not write too much. +Note: Your output must be parsable, valid JSON. ALWAYS OUTPUT VALID JSON. + """ \ No newline at end of file diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 98758d7..585e1f9 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -87,14 +87,13 @@ def create_powerpoint_from_outline(self, outline: list) -> str: for slide in outline: user_input = slide data_messages = format_simple_message_for_gpt( - system_message=TOOL_USE_PROMPT, + system_message=TOOL_USE_PROMPT + "\n\n Not that there is no need to generate a diagram unless user asks you to generate one.", user_message=f"This is the user input: \n{user_input}\n Analyze this and output in the given format.") data_response = call_gpt_with_backoff( messages=data_messages, temperature=0, max_length=MAX_CONTENT_LENGTH) - print(data_response) calls = parse_function_call_output(data_response) - print('\n\n',calls) + # TODO(sirri69): THIS CAN BE ABSTRACTED IN A `update_prompt` function diagrams = [] for func, param in calls: @@ -117,17 +116,19 @@ def create_powerpoint_from_outline(self, outline: list) -> str: You can use them in your powerpoint.""" - # create the slide and append it to the deck slide_messages = format_simple_message_for_gpt( system_message=SLIDE_CREATION_PROMPT, user_message=user_input) slide_response = call_gpt_with_backoff( messages=slide_messages, temperature=0.2, max_length=MAX_CONTENT_LENGTH) + slide_json = json.loads(slide_response) deck.append(slide_json) + + # TODO(sirri69) : Both title and filename can be generated in one call title_messages = format_simple_message_for_gpt( - system_message=TITLE_GEN_SYSTEM_PROMPT, user_message=deck) + system_message=TITLE_GEN_SYSTEM_PROMPT, user_message=str(outline)) title_response = call_gpt_with_backoff(messages=title_messages) title = title_response.replace('"', '') From 914076e8197a7910aa47695637385eb011351abe Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Fri, 22 Sep 2023 18:31:07 +0530 Subject: [PATCH 11/15] Added image generation error handling --- powerpoint_generative_ai/ppt_generator.py | 4 +++- powerpoint_generative_ai/utils/utils.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 585e1f9..78a6a2b 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -42,7 +42,9 @@ def create_powerpoint(self, user_input: Union[str, list]) -> str: user_input += f"\nUse chart type: {best_chart_response}" elif func == "generate_mermaid_diagram": mermaid_text, name = param.split("@,@") - generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + resp = generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + if resp is None: + continue diagrams.append(name) if diagrams != []: diff --git a/powerpoint_generative_ai/utils/utils.py b/powerpoint_generative_ai/utils/utils.py index 26f2588..b194cbd 100644 --- a/powerpoint_generative_ai/utils/utils.py +++ b/powerpoint_generative_ai/utils/utils.py @@ -5,6 +5,7 @@ from typing import List import requests import base64, zlib +from PIL import Image def setup_logger(name) -> logging.Logger: """ @@ -107,5 +108,11 @@ def generate_mermaid_diagram(mermaid_text: str, filename: str = "diagram.png"): with open(filename, "wb") as f: f.write(response.content) + + try: + img = Image.open(filename) + except Exception as e: + print(e) + return None return filename From e0ba99791c6a72e0321f3861dd3f1e187d208fc0 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Mon, 25 Sep 2023 19:52:41 +0530 Subject: [PATCH 12/15] Minor bug fixes --- powerpoint_generative_ai/ppt_generator.py | 6 +- powerpoint_generative_ai/utils/utils.py | 12 +- test.py | 148 +++++++++++++++++++++- 3 files changed, 158 insertions(+), 8 deletions(-) diff --git a/powerpoint_generative_ai/ppt_generator.py b/powerpoint_generative_ai/ppt_generator.py index 78a6a2b..126c837 100644 --- a/powerpoint_generative_ai/ppt_generator.py +++ b/powerpoint_generative_ai/ppt_generator.py @@ -46,7 +46,7 @@ def create_powerpoint(self, user_input: Union[str, list]) -> str: if resp is None: continue diagrams.append(name) - + if diagrams != []: diagrams = "\n".join([diagram+'.png' for diagram in diagrams]) @@ -105,7 +105,9 @@ def create_powerpoint_from_outline(self, outline: list) -> str: user_input += f"\nUse chart type: {best_chart_response}" elif func == "generate_mermaid_diagram": mermaid_text, name = param.split("@,@") - generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + resp = generate_mermaid_diagram(mermaid_text=mermaid_text, filename=name+'.png') + if resp is None: + continue diagrams.append(name) if diagrams != []: diff --git a/powerpoint_generative_ai/utils/utils.py b/powerpoint_generative_ai/utils/utils.py index b194cbd..bc98f09 100644 --- a/powerpoint_generative_ai/utils/utils.py +++ b/powerpoint_generative_ai/utils/utils.py @@ -103,13 +103,15 @@ def generate_mermaid_diagram(mermaid_text: str, filename: str = "diagram.png"): """ encoded_mermaid_text = base64.urlsafe_b64encode(zlib.compress(mermaid_text.encode("utf-8"), 9)).decode("ascii") - url = f"https://kroki.io/mermaid/png/{encoded_mermaid_text}" - response = requests.get(url) + + try: + url = f"https://kroki.io/mermaid/png/{encoded_mermaid_text}" + response = requests.get(url) - with open(filename, "wb") as f: - f.write(response.content) + + with open(filename, "wb") as f: + f.write(response.content) - try: img = Image.open(filename) except Exception as e: print(e) diff --git a/test.py b/test.py index 43fb04c..083c2aa 100644 --- a/test.py +++ b/test.py @@ -41,6 +41,152 @@ "Provide an outro on the topic: Obesity and Health Insurance Premiums." ] +OUTLINE2 = [ + """ + Bar graph with 3 bars as depicted below + Only need % and UCL plotted. + Include dotted like at 34.4% “Performance Goal” + UCL % should be included at top of UCL + + Total DRUG +N = 157 + +Proportion of patients with Event +15.9% +(UCL 21.3%) +(LCL 11%) + +======== + +DRUG Subgroup A +N = 41 + +2.4% +(UCL 10.2%) +(LCL 0%) + + +======== + +DRUG Subgroup B +N = 116 + +20.7% +(UCL 27.5%) +(LCL 0%) + + """, + + """ +Title: Incidence of AE TERM Following Treatment with DRUG Sub-title: National Health Reporting System in United States Between January 2021 – January 2022 [please make this look nice – not as important as main title – should be contained in title box though so searchable on iPad] + +Create Horizontal bar graph showing number of events for top 3 age groups (exclude 30+) + +Need to compare males vs females for each age category + +X-axis = Incidence of AE TERM (per 1 Million Doses) + +Please include placeholder footnote as client needs to include + +Since comparisons are between sex within age groups – try to separate age groups with vertical backfill for some other nice element so separated visually + +Data: +Age group,Males,Females +5 to 11,145,63 +12 to 17,456,263 +18 to 29,554,123 +30+,572,326 +""", + +""" +Title: enrollment distribution by aneurysm size and rupture status (ITT) + +Create two side-by-side bar graphs. (1-Left) unruptured (2-right) ruptured. + +Plot %s on Y-Axis by Sac Width on X-axis. Note – you need to calculate %s which is =(x/N)*100 + +Y-axis scales should be the same on both plots and should be in-line (eg 10% should be same on both plots so message not skewed when comparing data on two figures) + +Unruptured and ruptured bars should be different colors – not necessary to distinguish sac widths with different colors + +Include N in legend + +X-axis label = Aneurysm Sac Width (mm) + +Data: + +Extracted data from the table in the image in CSV format: + +Sac width,N,x,n +>3-4,5,141,1 +>4-5,29,141,9 +>5-6,27,141,3 +>6-7,32,141,1 +>7-8,28,141,1 +>8-9,13,141,- +>9-10,6,-,141 +>10-11,-,>11-12,1 +>11-12,1,141,- +""", + +""" +Title: study 910: primary endpoint demonstrates meaningful activity in patients with severe disease + +Create two stacked bar graphs showing ORR and CBR side by side and include the breakdown of responses within each – outlined below + +ORR = Overall Response Rate [spell out below figure] + +PR: partial response +VGPR: very good partial response +sCR: stringent complete response + +Note: you will plot percentages + +No need to label each piece of the stacked rather use color coded legend to distinguish response and number of patients. + +Above this stack – put a text box with overall ORR % and (95% CI) + + +==== + +CBR = Clinical Benefit Rate [spell out below figure] + +MR: minimal response +PR: partial response +VGPR: very good partial response +sCR: stringent complete response + +Note: you will plot percentages + +No need to label each piece of the stacked rather use color coded legend to distinguish response and number of patients. + +Above this stack – put a text box with overall CBR % and (95% CI) + + +====== + +CSV Data: + +Category,ORR a,n (%) +KCP-330-012 mITT (N = 122),31 (25.4),18.0, 34.1 +CBR b,n (%) +48 (39.3),30.6, 48.6 +Best Response +sCR/CR,n (%) +2 (1.6),0.2, 5.8 +GPR,n (%) +6 (4.9),1.8, 10.4 +PR,n (%) +23 (18.9),12.3, 26.9 +MR,n (%) +17 (13.9),8.3, 21.4 +SD,n (%) +48 (39.3),30.6, 48.6 +PD/NE,n (%) +26 (21.3),14.4, 29.6 + +""" +] def generate_ppt(): @@ -49,7 +195,7 @@ def generate_ppt(): def generate_ppt_from_outline(): ppt_generator = PowerPointGenerator(OPENAI_API_KEY) - powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=OUTLINE) + powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=OUTLINE2) print(powerpoint_files) if __name__ == "__main__": From d818b0043ba4ff0bb56953660f2fba7ec47405df Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Wed, 27 Sep 2023 06:09:50 +0530 Subject: [PATCH 13/15] Minor prompt update --- powerpoint_generative_ai/domain/prompts.py | 2 + test.py | 210 ++++++++++++++++++++- 2 files changed, 211 insertions(+), 1 deletion(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index c90c7f7..68117da 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -199,4 +199,6 @@ Note: Your output must be parsable, valid JSON. ALWAYS OUTPUT VALID JSON. +THERE CAN ONLY EITHER BE A CHART OR A DIAGRAM. NOT BOTH. FOLLOW THE FORMAT + """ \ No newline at end of file diff --git a/test.py b/test.py index 083c2aa..4ce2099 100644 --- a/test.py +++ b/test.py @@ -189,13 +189,221 @@ ] +OUTLINE3 = [ + """ +Title: baseline demographics were balanced between groups + +Format table showing: +Age (years), mean (SD) +Female +While +Black or African American +Asian +Hispanic or Latino + +All will be %’s except for age + +Only show Drug / Placebo - Drug on left + +CSV Data: +Category,ORR a,n (%) +KCP-330-012 mITT (N = 122),31 (25.4),18.0, 34.1 +CBR b,n (%) +48 (39.3),30.6, 48.6 +Best Response +sCR/CR,n (%) +2 (1.6),0.2, 5.8 +GPR,n (%) +6 (4.9),1.8, 10.4 +PR,n (%) +23 (18.9),12.3, 26.9 +MR,n (%) +17 (13.9),8.3, 21.4 +SD,n (%) +48 (39.3),30.6, 48.6 +PD/NE,n (%) +26 (21.3),14.4, 29.6 +""", + """ + Title: resistance to multiple oral antibiotics further complicates treatment of cUTI in out-patient setting +Table should show: +Cefuroxime (β-lactam) +Ciprofloxacin (fluroquinolone) +Levofloxacin (fluroquinolone) +Trimethoprim-sulfamethoxazole + +Overarching table header: “Co-Resistant Agent (Class)” [REF - Critchley, 2019] +Show on Right [Column 3] +Show on Left [Column 2] +Bullet at bottom: 1 in 8 patients with cUTI infected with pathogen resistant to ≥ 3 most common classes of antibiotics + +CSV Data: +Agent,Trimethoprim-sulfamethoxazole (N=588),Levofloxacin (N=445) +Cefuroxime,31.3,45.7 +Ceftazidime,15.0,24.7 +Ciprofloxacin,44.2,100 +Levofloxacin,42.5,100 +Doripenem,0.0,0.0 +Ertapenem,0.3,0.5 +Imipenem,0.0,0.0 +Meropenem,0.0,0.0 +Trimethoprim-sulfamethoxazole,100,56.2 + +""", +""" +Bar graph with 3 bars as depicted below + Only need % and UCL plotted. + Include dotted like at 34.4% “Performance Goal” + UCL % should be included at top of UCL + + Total DRUG +N = 157 + +Proportion of patients with Event +15.9% +(UCL 21.3%) +(LCL 11%) + +======== + +DRUG Subgroup A +N = 41 + +2.4% +(UCL 10.2%) +(LCL 0%) + + +======== + +DRUG Subgroup B +N = 116 + +20.7% +(UCL 27.5%) +(LCL 0%) + + +""", +""" +Title: Incidence of AE TERM Following Treatment with DRUG Sub-title: National Health Reporting System in United States Between January 2021 – January 2022 [please make this look nice – not as important as main title – should be contained in title box though so searchable on iPad] + +Create Horizontal bar graph showing number of events for top 3 age groups (exclude 30+) + +Need to compare males vs females for each age category + +X-axis = Incidence of AE TERM (per 1 Million Doses) + +Please include placeholder footnote as client needs to include + +Since comparisons are between sex within age groups – try to separate age groups with vertical backfill for some other nice element so separated visually + +Data: +Age group,Males,Females +5 to 11,145,63 +12 to 17,456,263 +18 to 29,554,123 +30+,572,326 +""", +""" +Title: enrollment distribution by aneurysm size and rupture status (ITT) + +Create two side-by-side bar graphs. (1-Left) unruptured (2-right) ruptured. + +Plot %s on Y-Axis by Sac Width on X-axis. Note – you need to calculate %s which is =(x/N)*100 + +Y-axis scales should be the same on both plots and should be in-line (eg 10% should be same on both plots so message not skewed when comparing data on two figures) + +Unruptured and ruptured bars should be different colors – not necessary to distinguish sac widths with different colors + +Include N in legend + +X-axis label = Aneurysm Sac Width (mm) + +Data: + +Extracted data from the table in the image in CSV format: + +Sac width,N,x,n +>3-4,5,141,1 +>4-5,29,141,9 +>5-6,27,141,3 +>6-7,32,141,1 +>7-8,28,141,1 +>8-9,13,141,- +>9-10,6,-,141 +>10-11,-,>11-12,1 +>11-12,1,141,- +""", +""" +Title: study 910: primary endpoint demonstrates meaningful activity in patients with severe disease + +Create two stacked bar graphs showing ORR and CBR side by side and include the breakdown of responses within each – outlined below + +ORR = Overall Response Rate [spell out below figure] + +PR: partial response +VGPR: very good partial response +sCR: stringent complete response + +Note: you will plot percentages + +No need to label each piece of the stacked rather use color coded legend to distinguish response and number of patients. + +Above this stack – put a text box with overall ORR % and (95% CI) + + +==== + +CBR = Clinical Benefit Rate [spell out below figure] + +MR: minimal response +PR: partial response +VGPR: very good partial response +sCR: stringent complete response + +Note: you will plot percentages + +No need to label each piece of the stacked rather use color coded legend to distinguish response and number of patients. + +Above this stack – put a text box with overall CBR % and (95% CI) + + +====== + +CSV Data: + +Category,ORR a,n (%) +KCP-330-012 mITT (N = 122),31 (25.4),18.0, 34.1 +CBR b,n (%) +48 (39.3),30.6, 48.6 +Best Response +sCR/CR,n (%) +2 (1.6),0.2, 5.8 +GPR,n (%) +6 (4.9),1.8, 10.4 +PR,n (%) +23 (18.9),12.3, 26.9 +MR,n (%) +17 (13.9),8.3, 21.4 +SD,n (%) +48 (39.3),30.6, 48.6 +PD/NE,n (%) +26 (21.3),14.4, 29.6 + +""" + + +] + + def generate_ppt(): ppt_generator = PowerPointGenerator(OPENAI_API_KEY) powerpoint_files = [ppt_generator.create_powerpoint(user_input=user_text) for user_text in USER_TEXTS] def generate_ppt_from_outline(): ppt_generator = PowerPointGenerator(OPENAI_API_KEY) - powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=OUTLINE2) + powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=OUTLINE3) print(powerpoint_files) if __name__ == "__main__": From 11496ee2c6159798fd52dcc2d3855753d66b4283 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Thu, 28 Sep 2023 18:15:41 +0530 Subject: [PATCH 14/15] Added `_csv_to_table` --- powerpoint_generative_ai/domain/prompts.py | 5 ++++- powerpoint_generative_ai/ppt/ppt_creator.py | 24 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 68117da..2337e3b 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -184,7 +184,7 @@ { "title": "Slide 1", - "content": "This is some detailed content for slide 1 which goes well with the diagram.", + "content": "This is some detailed content for slide 1. Informative.", "diagram_name": "Diagram name" } @@ -197,6 +197,9 @@ But write short sentences with good information. Do not write too much. +DO NOT SAY THINGS LIKE "This slide has" or "This slide contains", just write the content. +Be informative, no need to mention what the slide contains, just write directly. + Note: Your output must be parsable, valid JSON. ALWAYS OUTPUT VALID JSON. THERE CAN ONLY EITHER BE A CHART OR A DIAGRAM. NOT BOTH. FOLLOW THE FORMAT diff --git a/powerpoint_generative_ai/ppt/ppt_creator.py b/powerpoint_generative_ai/ppt/ppt_creator.py index 24b5e1e..65798db 100644 --- a/powerpoint_generative_ai/ppt/ppt_creator.py +++ b/powerpoint_generative_ai/ppt/ppt_creator.py @@ -79,6 +79,7 @@ def add_chart(self, data: dict, slide: Slide, x: Inches = Inches(4.75), y: Inche chart_data.add_series(series['name'], series['values']) chart = slide.shapes.add_chart(chart_type, x, y, cx, cy, chart_data).chart + chart.has_legend = True for series in chart.series: series.has_data_labels = True @@ -91,3 +92,26 @@ def save(self, file_name: str): self.presentation.save(file_name) self.logger.info(f"Presentation successfully created: {file_name}") + + + def _csv_to_table(table_placeholder, csv_text): + # ---split the csv text into rows and columns by commas and newlines--- + rows = csv_text.split('\n') + data = [row.split(',') for row in rows] + + # ---get the number of rows and columns from the data--- + row_count = len(data) + col_count = len(data[0]) + + # ---insert a table into the placeholder with the same size as the data--- + table_shape = table_placeholder.insert_table(row_count, col_count) + table = table_shape.table + + # ---iterate over the data and assign each value to the corresponding cell--- + for r in range(row_count): + for c in range(col_count): + cell = table.cell(r, c) + cell.text = data[r][c] + + # ---return the table shape object--- + return table_shape From 04d3beec5792f6fa80e9f69b6aefdf790dd0eec0 Mon Sep 17 00:00:00 2001 From: Sirri69 Date: Fri, 29 Sep 2023 02:23:30 +0530 Subject: [PATCH 15/15] Added Table Generation support --- powerpoint_generative_ai/domain/prompts.py | 19 ++++++++- powerpoint_generative_ai/ppt/ppt_creator.py | 43 ++++++++++++++------- test.py | 8 +++- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/powerpoint_generative_ai/domain/prompts.py b/powerpoint_generative_ai/domain/prompts.py index 2337e3b..3a4ca7d 100644 --- a/powerpoint_generative_ai/domain/prompts.py +++ b/powerpoint_generative_ai/domain/prompts.py @@ -188,9 +188,20 @@ "diagram_name": "Diagram name" } +example 4: + + { + "title": "Slide 1", + "content": "This is some detailed content for slide 1. Informative and detailed.", + "table_data": "Table data" // NOTE: This must be CSV data properly formatted. Must be parsable. + } -You will be provided diagram_name and chart_data when necessary. + +VERY IMPORTANT: CSV DATA MUST HAVE SAME NUMBER OF COLUMNS IN EACH ROW. IF NOT, THE CODE WILL BREAK. The CSV must be parsable. Format the data properly. + +You will be provided diagram_name and chart_data and table_data when necessary. +If you see some csv data, generate a table. But if user has provided some data for a chart, use that data to generate a chart. If user asked for a chart, then generate a chart. Note that you are an expert, you must write excellent content. You must always output in the given format. Even if other outputs are different. Be engaging and always provide new information. Do not summarize what the slide is about, the content on each slide should be meaningful information. @@ -202,6 +213,10 @@ Note: Your output must be parsable, valid JSON. ALWAYS OUTPUT VALID JSON. -THERE CAN ONLY EITHER BE A CHART OR A DIAGRAM. NOT BOTH. FOLLOW THE FORMAT +GENERATE PROPER CSV DATA FOR TABLES when required. +Again, if you see csv data, generate a table. + +THERE CAN ONLY EITHER BE A CHART OR A DIAGRAM OR A TABLE. NOT MULTIPLE, ONLY ONE OF THEM. FOLLOW THE FORMAT. +Given CSV data, generate a table. """ \ No newline at end of file diff --git a/powerpoint_generative_ai/ppt/ppt_creator.py b/powerpoint_generative_ai/ppt/ppt_creator.py index 65798db..7ec6cf1 100644 --- a/powerpoint_generative_ai/ppt/ppt_creator.py +++ b/powerpoint_generative_ai/ppt/ppt_creator.py @@ -3,7 +3,7 @@ from pptx.slide import Slide from pptx.chart.data import CategoryChartData from pptx.enum.chart import XL_CHART_TYPE -from pptx.util import Inches +from pptx.util import Inches, Pt from powerpoint_generative_ai.utils.utils import setup_logger @@ -47,12 +47,16 @@ def add_slide(self, content: dict): text_content = content.get('content', None) chart_data = content.get('chart_data', None) image_path = content.get('diagram_name', None) + table_data = content.get('table_data', None) + LAYOUT = SLIDE_LAYOUTS['Title Slide'] - if text_content and (chart_data or image_path): + if table_data: + LAYOUT = SLIDE_LAYOUTS['Title Only'] + elif text_content and (chart_data or image_path): LAYOUT = SLIDE_LAYOUTS['Two Content'] # Text column and blank right side elif chart_data and not text_content: LAYOUT = SLIDE_LAYOUTS['Title Only'] # Just a title for a big chart - elif text_content and not chart_data: + elif (text_content and not chart_data): LAYOUT = SLIDE_LAYOUTS['Title and Content'] # Standard slide format slide_layout = self.presentation.slide_layouts[LAYOUT] @@ -60,6 +64,13 @@ def add_slide(self, content: dict): title = slide.shapes.title title.text = content.get('title', None) + + if table_data: + title.text = text_content + title.text_frame.paragraphs[0].font.size = Pt(13) + self.add_table(slide=slide, csv_text=table_data) + return + if text_content: content_shape = slide.shapes.placeholders[1] content_shape.text = text_content @@ -71,6 +82,7 @@ def add_slide(self, content: dict): self.add_image(image_path=image_path, slide=slide) + def add_chart(self, data: dict, slide: Slide, x: Inches = Inches(4.75), y: Inches = Inches(2), cx: Inches=Inches(5.5), cy: Inches = Inches(4.5), chart_type: int = XL_CHART_TYPE.COLUMN_CLUSTERED): """Creates a chart and adds it to the current slide""" chart_data = CategoryChartData() @@ -88,30 +100,31 @@ def add_image(self, image_path: str, slide: Slide, x: Inches = Inches(4.75), y: """Adds an image to the current slide""" slide.shapes.add_picture(image_path, x, y, cx, cy) + def add_table(self, slide: Slide, csv_text: str): + """Adds a table to the current slide""" + + shapes = slide.shapes + self._csv_to_table(shapes, csv_text) + def save(self, file_name: str): self.presentation.save(file_name) self.logger.info(f"Presentation successfully created: {file_name}") - - - def _csv_to_table(table_placeholder, csv_text): - # ---split the csv text into rows and columns by commas and newlines--- + def _csv_to_table(self, shapes, csv_text): rows = csv_text.split('\n') data = [row.split(',') for row in rows] - # ---get the number of rows and columns from the data--- row_count = len(data) col_count = len(data[0]) - # ---insert a table into the placeholder with the same size as the data--- - table_shape = table_placeholder.insert_table(row_count, col_count) - table = table_shape.table + left = top = Inches(2.0) + width = Inches(6.0) + height = Inches(0.8) + + + table = shapes.add_table(row_count, col_count, left, top, width, height).table - # ---iterate over the data and assign each value to the corresponding cell--- for r in range(row_count): for c in range(col_count): cell = table.cell(r, c) cell.text = data[r][c] - - # ---return the table shape object--- - return table_shape diff --git a/test.py b/test.py index 4ce2099..b8267f3 100644 --- a/test.py +++ b/test.py @@ -205,7 +205,7 @@ Only show Drug / Placebo - Drug on left -CSV Data: +CSV Data, generate a table: Category,ORR a,n (%) KCP-330-012 mITT (N = 122),31 (25.4),18.0, 34.1 CBR b,n (%) @@ -223,6 +223,10 @@ 48 (39.3),30.6, 48.6 PD/NE,n (%) 26 (21.3),14.4, 29.6 + +========= + +Generate a table using the data above. Do not generate a graph. """, """ Title: resistance to multiple oral antibiotics further complicates treatment of cUTI in out-patient setting @@ -403,7 +407,7 @@ def generate_ppt(): def generate_ppt_from_outline(): ppt_generator = PowerPointGenerator(OPENAI_API_KEY) - powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=OUTLINE3) + powerpoint_files = ppt_generator.create_powerpoint_from_outline(outline=[OUTLINE3[0]]) print(powerpoint_files) if __name__ == "__main__":