From cf4852b04fced70df3c922d7ce9740bf5ae75034 Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Fri, 10 May 2024 14:16:56 +0000
Subject: [PATCH 1/9] Import logger

---
 bertopic/representation/_openai.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index e84a75f1..0d026784 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -5,8 +5,9 @@
 from scipy.sparse import csr_matrix
 from typing import Mapping, List, Tuple, Any, Union, Callable
 from bertopic.representation._base import BaseRepresentation
-from bertopic.representation._utils import retry_with_exponential_backoff, truncate_document
+from bertopic.representation._utils import retry_with_exponential_backoff, truncate_document, MyLogger
 
+logger = MyLogger("WARNING")
 
 DEFAULT_PROMPT = """
 This is a list of texts where each collection of texts describe a topic. After each collection of texts, the name of the topic they represent is mentioned as a short-highly-descriptive title
@@ -37,7 +38,7 @@
 Topic name:"""
 
 DEFAULT_CHAT_PROMPT = """
-I have a topic that contains the following documents: 
+I have a topic that contains the following documents:
 [DOCUMENTS]
 The topic is described by the following keywords: [KEYWORDS]
 

From 69179b3002f6e5dd1510e4639447680c9753760a Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Fri, 10 May 2024 14:24:30 +0000
Subject: [PATCH 2/9] Handle finish reasons

---
 bertopic/representation/_openai.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index 0d026784..055f5d92 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -218,10 +218,13 @@ def extract_topics(self,
                 else:
                     response = self.client.chat.completions.create(**kwargs)
 
-                # Check whether content was actually generated
-                # Adresses #1570 for potential issues with OpenAI's content filter
-                if hasattr(response.choices[0].message, "content"):
-                    label = response.choices[0].message.content.strip().replace("topic: ", "")
+                choice = response.choices[0]
+                has_content = hasattr(response.choices[0].message, "content")
+
+                if rchoice.finish_reason == "stop" and has_content:
+                    label = rchoice.message.content.strip().replace("topic: ", "")
+                elif rchoice.finish_reason == "length" and has_content:
+                    label = rchoice.message.content.strip().replace("topic: ", "")
                 else:
                     label = "No label returned"
             else:

From acba986bbc9d5bc714e0fcb438b8c7e186f2621a Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Fri, 10 May 2024 14:47:52 +0000
Subject: [PATCH 3/9] Add logging for each finish_reason

---
 bertopic/representation/_openai.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index 055f5d92..f4eba317 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -194,7 +194,7 @@ def extract_topics(self,
             updated_topics: Updated topic representations
         """
         # Extract the top n representative documents per topic
-        repr_docs_mappings, _, _, _ = topic_model._extract_representative_docs(c_tf_idf, documents, topics, 500, self.nr_docs, self.diversity)
+        repr_docs_mappings, _, _, repr_doc_ids = topic_model._extract_representative_docs(c_tf_idf, documents, topics, 500, self.nr_docs, self.diversity)
 
         # Generate using OpenAI's Language Model
         updated_topics = {}
@@ -219,13 +219,20 @@ def extract_topics(self,
                     response = self.client.chat.completions.create(**kwargs)
 
                 choice = response.choices[0]
-                has_content = hasattr(response.choices[0].message, "content")
 
-                if rchoice.finish_reason == "stop" and has_content:
-                    label = rchoice.message.content.strip().replace("topic: ", "")
-                elif rchoice.finish_reason == "length" and has_content:
-                    label = rchoice.message.content.strip().replace("topic: ", "")
+                if choice.finish_reason == "stop":
+                    label = choice.message.content.strip().replace("topic: ", "")
+                elif choice.finish_reason == "length":
+                    logger.warn(f"Extracing Topics - Length limit reached for doc_ids ({repr_doc_ids})")
+                    if hasattr(response.choices[0].message, "content"):
+                        label = choice.message.content.strip().replace("topic: ", "")
+                    else:
+                        label = "Incomple output due to token limit being reached"
+                elif choice.finish_reason == "content_filter":
+                    logger.warn(f"Extracing Topics - Content filtered for doc_ids ({repr_doc_ids})")
+                    label = "Output content filtered by OpenAI"
                 else:
+                    logger.warn(f"Extracing Topics - No label due to finish_reason {choice.finish_reason} for doc_ids ({repr_doc_ids})")
                     label = "No label returned"
             else:
                 if self.exponential_backoff:

From 335338ec9c36e812371d4e6a8ad3a56317246221 Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Wed, 22 May 2024 15:21:30 +0000
Subject: [PATCH 4/9] Rename choice to output

- Replace all uses of choices[0] with output
---
 bertopic/representation/_openai.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index f4eba317..1f35e2c6 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -218,28 +218,28 @@ def extract_topics(self,
                 else:
                     response = self.client.chat.completions.create(**kwargs)
 
-                choice = response.choices[0]
+                output = response.choices[0]
 
-                if choice.finish_reason == "stop":
-                    label = choice.message.content.strip().replace("topic: ", "")
-                elif choice.finish_reason == "length":
+                if output.finish_reason == "stop":
+                    label = output.message.content.strip().replace("topic: ", "")
+                elif output.finish_reason == "length":
                     logger.warn(f"Extracing Topics - Length limit reached for doc_ids ({repr_doc_ids})")
-                    if hasattr(response.choices[0].message, "content"):
-                        label = choice.message.content.strip().replace("topic: ", "")
+                    if hasattr(output.message, "content"):
+                        label = output.message.content.strip().replace("topic: ", "")
                     else:
                         label = "Incomple output due to token limit being reached"
-                elif choice.finish_reason == "content_filter":
+                elif output.finish_reason == "content_filter":
                     logger.warn(f"Extracing Topics - Content filtered for doc_ids ({repr_doc_ids})")
                     label = "Output content filtered by OpenAI"
                 else:
-                    logger.warn(f"Extracing Topics - No label due to finish_reason {choice.finish_reason} for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"Extracing Topics - No label due to finish_reason {output.finish_reason} for doc_ids ({repr_doc_ids})")
                     label = "No label returned"
             else:
                 if self.exponential_backoff:
                     response = completions_with_backoff(self.client, model=self.model, prompt=prompt, **self.generator_kwargs)
                 else:
                     response = self.client.completions.create(model=self.model, prompt=prompt, **self.generator_kwargs)
-                label = response.choices[0].text.strip()
+                label = output.text.strip()
 
             updated_topics[topic] = [(label, 1)]
 

From c064bda894838d1bc99305ab5977fce995b9de38 Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Wed, 22 May 2024 15:49:10 +0000
Subject: [PATCH 5/9] Fix typo

---
 bertopic/representation/_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index 1f35e2c6..f70acb58 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -227,7 +227,7 @@ def extract_topics(self,
                     if hasattr(output.message, "content"):
                         label = output.message.content.strip().replace("topic: ", "")
                     else:
-                        label = "Incomple output due to token limit being reached"
+                        label = "Incomplete output due to token limit being reached"
                 elif output.finish_reason == "content_filter":
                     logger.warn(f"Extracing Topics - Content filtered for doc_ids ({repr_doc_ids})")
                     label = "Output content filtered by OpenAI"

From 8a97198cf4db443bfa110e2c9f6192e272f1707e Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Wed, 22 May 2024 15:49:45 +0000
Subject: [PATCH 6/9] Use requested logging prefix

---
 bertopic/representation/_openai.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index f70acb58..e49a4d09 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -223,16 +223,16 @@ def extract_topics(self,
                 if output.finish_reason == "stop":
                     label = output.message.content.strip().replace("topic: ", "")
                 elif output.finish_reason == "length":
-                    logger.warn(f"Extracing Topics - Length limit reached for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"OpenAI Topic Representation - Length limit reached for doc_ids ({repr_doc_ids})")
                     if hasattr(output.message, "content"):
                         label = output.message.content.strip().replace("topic: ", "")
                     else:
-                        label = "Incomplete output due to token limit being reached"
+                        label = "OpenAI Topic Representation - Incomplete output due to token limit being reached"
                 elif output.finish_reason == "content_filter":
-                    logger.warn(f"Extracing Topics - Content filtered for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"OpenAI Topic Representation - Content filtered for doc_ids ({repr_doc_ids})")
                     label = "Output content filtered by OpenAI"
                 else:
-                    logger.warn(f"Extracing Topics - No label due to finish_reason {output.finish_reason} for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"OpenAI Topic Representation - No label due to finish_reason {output.finish_reason} for doc_ids ({repr_doc_ids})")
                     label = "No label returned"
             else:
                 if self.exponential_backoff:

From 7e04b78caa65e110379616402a099f554fbee9c6 Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Wed, 22 May 2024 16:04:33 +0000
Subject: [PATCH 7/9] Update messages

---
 bertopic/representation/_openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index e49a4d09..0131b318 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -223,16 +223,16 @@ def extract_topics(self,
                 if output.finish_reason == "stop":
                     label = output.message.content.strip().replace("topic: ", "")
                 elif output.finish_reason == "length":
-                    logger.warn(f"OpenAI Topic Representation - Length limit reached for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"OpenAI Topic Representation - Length limit reached for documents IDs: ({repr_doc_ids})")
                     if hasattr(output.message, "content"):
                         label = output.message.content.strip().replace("topic: ", "")
                     else:
                         label = "OpenAI Topic Representation - Incomplete output due to token limit being reached"
                 elif output.finish_reason == "content_filter":
-                    logger.warn(f"OpenAI Topic Representation - Content filtered for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"OpenAI Topic Representation - The content filter of OpenAI was trigger for the following documents IDs: ({repr_doc_ids})")
                     label = "Output content filtered by OpenAI"
                 else:
-                    logger.warn(f"OpenAI Topic Representation - No label due to finish_reason {output.finish_reason} for doc_ids ({repr_doc_ids})")
+                    logger.warn(f"OpenAI Topic Representation - Couldn't create a label due to {output.finish_reason} for the following document IDs: ({repr_doc_ids})")
                     label = "No label returned"
             else:
                 if self.exponential_backoff:

From cf020ec6250f0476111daec5d931b56c7d9f768f Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Fri, 14 Jun 2024 14:31:08 +0000
Subject: [PATCH 8/9] Use logger from topic_model

---
 bertopic/representation/_openai.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index 6dc4da61..c04b2684 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -5,9 +5,7 @@
 from scipy.sparse import csr_matrix
 from typing import Mapping, List, Tuple, Any, Union, Callable
 from bertopic.representation._base import BaseRepresentation
-from bertopic.representation._utils import retry_with_exponential_backoff, truncate_document, MyLogger
-
-logger = MyLogger("WARNING")
+from bertopic.representation._utils import retry_with_exponential_backoff, truncate_document
 
 DEFAULT_PROMPT = """
 This is a list of texts where each collection of texts describe a topic. After each collection of texts, the name of the topic they represent is mentioned as a short-highly-descriptive title
@@ -223,17 +221,17 @@ def extract_topics(self,
                 if output.finish_reason == "stop":
                     label = output.message.content.strip().replace("topic: ", "")
                 elif output.finish_reason == "length":
-                    logger.warn(f"OpenAI Topic Representation - Length limit reached for documents IDs: ({repr_doc_ids})")
+                    topic_model.logger.warn(f"OpenAI Topic Representation - Length limit reached for documents IDs: ({repr_doc_ids})")
                     if hasattr(output.message, "content"):
                         label = output.message.content.strip().replace("topic: ", "")
                     else:
                         label = "OpenAI Topic Representation - Incomplete output due to token limit being reached"                        
                 # Addresses #1570 for potential issues with OpenAI's content filter        
                 elif output.finish_reason == "content_filter":
-                    logger.warn(f"OpenAI Topic Representation - The content filter of OpenAI was trigger for the following documents IDs: ({repr_doc_ids})")
+                    topic_model.logger.warn(f"OpenAI Topic Representation - The content filter of OpenAI was trigger for the following documents IDs: ({repr_doc_ids})")
                     label = "Output content filtered by OpenAI"
                 else:
-                    logger.warn(f"OpenAI Topic Representation - Couldn't create a label due to {output.finish_reason} for the following document IDs: ({repr_doc_ids})")
+                    topic_model.logger.warn(f"OpenAI Topic Representation - Couldn't create a label due to {output.finish_reason} for the following document IDs: ({repr_doc_ids})")
                     label = "No label returned"
             else:
                 if self.exponential_backoff:

From d47a928b8590128f6eca928d1ce4c3f7cf151bf9 Mon Sep 17 00:00:00 2001
From: Steven Solomon <steven-solomon@github.com>
Date: Fri, 14 Jun 2024 14:36:19 +0000
Subject: [PATCH 9/9] Fix unused response for non-chat flow

---
 bertopic/representation/_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bertopic/representation/_openai.py b/bertopic/representation/_openai.py
index c04b2684..7619e834 100644
--- a/bertopic/representation/_openai.py
+++ b/bertopic/representation/_openai.py
@@ -238,7 +238,7 @@ def extract_topics(self,
                     response = completions_with_backoff(self.client, model=self.model, prompt=prompt, **self.generator_kwargs)
                 else:
                     response = self.client.completions.create(model=self.model, prompt=prompt, **self.generator_kwargs)
-                label = output.text.strip()
+                label = response.choices[0].text.strip()
 
             updated_topics[topic] = [(label, 1)]