From 12dbbdbca1960750d4f72a698e0be47ca7b8d0ec Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Tue, 27 May 2025 14:05:02 -0400 Subject: [PATCH 01/12] Version bump to 10.5 --- aimon/_version.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aimon/_version.py b/aimon/_version.py index 23f5cd5..938f4f9 100644 --- a/aimon/_version.py +++ b/aimon/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "aimon" -__version__ = "0.10.4" +__version__ = "0.10.5" diff --git a/setup.py b/setup.py index c2ecb17..d1299cc 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ name='aimon', python_requires='>3.8.0', packages=find_packages(), - version="0.10.4", + version="0.10.5", install_requires=[ "annotated-types~=0.6.0", "anyio~=4.9.0", From c80d5ca027196138a7b4ce25dba8eb2fa500a4b9 Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 15:55:50 -0400 Subject: [PATCH 02/12] Added yml file to automatically run tests when a PR is opened, synchronized (updated), or reopened that targets the branch --- .github/workflows/test-on-pr.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/test-on-pr.yml diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml new file mode 100644 index 0000000..da032c3 --- /dev/null +++ b/.github/workflows/test-on-pr.yml @@ -0,0 +1,31 @@ +name: Test on Pull Request + +on: + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + + - name: Run tests + # env: + # AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} + run: | + pytest tests/ From 18f0c88246d76836bda8fa67365e9efae272c3ff Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 16:10:30 -0400 Subject: [PATCH 03/12] Updated pip install packages --- .github/workflows/test-on-pr.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml index da032c3..e7a3102 100644 --- a/.github/workflows/test-on-pr.yml +++ b/.github/workflows/test-on-pr.yml @@ -21,8 +21,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest + pip install pytest setuptools wheel twine - name: Run tests # env: From b993eb7016e550830ba05c4168ef02247dd8c29f Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 16:12:46 -0400 Subject: [PATCH 04/12] pip install . --- .github/workflows/test-on-pr.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml index e7a3102..ba03860 100644 --- a/.github/workflows/test-on-pr.yml +++ b/.github/workflows/test-on-pr.yml @@ -21,7 +21,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest setuptools wheel twine + pip install . + pip install pytest - name: Run tests # env: From 48dcefaa35de24f736f4a9af5a740edb8af151f4 Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 16:17:28 -0400 Subject: [PATCH 05/12] Uncommenting API Key env var --- .github/workflows/test-on-pr.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml index ba03860..cdc2361 100644 --- a/.github/workflows/test-on-pr.yml +++ b/.github/workflows/test-on-pr.yml @@ -25,7 +25,7 @@ jobs: pip install pytest - name: Run tests - # env: - # AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} + env: + AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} run: | pytest tests/ From 5841ebe45c21102b0cc7837e89047612ea8c2503 Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 17:18:50 -0400 Subject: [PATCH 06/12] Ignoring obsolete and low level tests --- .github/workflows/test-on-pr.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml index cdc2361..4f98639 100644 --- a/.github/workflows/test-on-pr.yml +++ b/.github/workflows/test-on-pr.yml @@ -28,4 +28,5 @@ jobs: env: AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} run: | - pytest tests/ + # pytest tests/test_detect.py tests/test_evaluate.py + pytest tests/ --ignore=tests/obsolete/ --ignore=tests/test_low_level_api.py -v From e34cebc77730910ba40c72c0fbdc0f06683b4235 Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 17:46:13 -0400 Subject: [PATCH 07/12] Added descriptive logging to tests --- .github/workflows/test-on-pr.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml index 4f98639..9c3eddc 100644 --- a/.github/workflows/test-on-pr.yml +++ b/.github/workflows/test-on-pr.yml @@ -28,5 +28,12 @@ jobs: env: AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} run: | + + ## Run only two test files # pytest tests/test_detect.py tests/test_evaluate.py - pytest tests/ --ignore=tests/obsolete/ --ignore=tests/test_low_level_api.py -v + + ## Ignore some files and run without descriptive logging + # pytest tests/ --ignore=tests/obsolete/ --ignore=tests/test_low_level_api.py -v + + ## Ignore some files and run with descriptive logging + pytest tests/ --ignore=tests/obsolete/ --ignore=tests/test_low_level_api.py -v --log-cli-level=INFO From 21b8631e406a218d7a3aecaa6059b7dc4ec3e038 Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 17:53:22 -0400 Subject: [PATCH 08/12] Debugging yml file and requirements modified for streamlit chatbot example --- .github/workflows/test-on-pr.yml | 6 ++++++ examples/streamlit_apps/chatbot/requirements.txt | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-on-pr.yml b/.github/workflows/test-on-pr.yml index 9c3eddc..9a72962 100644 --- a/.github/workflows/test-on-pr.yml +++ b/.github/workflows/test-on-pr.yml @@ -24,6 +24,12 @@ jobs: pip install . pip install pytest + - name: Debug env var + env: + AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} + run: | + echo "API KEY LENGTH: ${#AIMON_API_KEY}" + - name: Run tests env: AIMON_API_KEY: ${{ secrets.AIMON_API_KEY }} diff --git a/examples/streamlit_apps/chatbot/requirements.txt b/examples/streamlit_apps/chatbot/requirements.txt index 5665f53..46d3b2c 100644 --- a/examples/streamlit_apps/chatbot/requirements.txt +++ b/examples/streamlit_apps/chatbot/requirements.txt @@ -1,5 +1,5 @@ llama-index llama-index-readers-web streamlit -aimon>=0.5.0 - +aimon>=0.10.0 +httpx<0.28.1 \ No newline at end of file From fc77b91c9d2123466fe733c4bb4c910d836faf78 Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Thu, 29 May 2025 18:11:56 -0400 Subject: [PATCH 09/12] Adjusted streamlit chatbot to be compatible with IA2 --- .../chatbot/aimon_chatbot_demo.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/examples/streamlit_apps/chatbot/aimon_chatbot_demo.py b/examples/streamlit_apps/chatbot/aimon_chatbot_demo.py index 8b1a6dc..e8c49a1 100644 --- a/examples/streamlit_apps/chatbot/aimon_chatbot_demo.py +++ b/examples/streamlit_apps/chatbot/aimon_chatbot_demo.py @@ -43,7 +43,7 @@ def load_data(): system_prompt="""You are an expert on answering questions on Essays and your job is to answer questions related to this domain. Your answers should be based on - facts – do not hallucinate features.""", + facts - do not hallucinate features.""", ) logging.info("Finished creating OpenAI LLM...") Settings.chunk_size = 256 @@ -107,10 +107,12 @@ def execute(): openai_api_key = os.getenv("OPENAI_API_KEY") openai.api_key = openai_api_key - instructions = st.text_input( - "Instructions for the chatbot. Ex: Answer the user's question in a professional tone.", - value="Answer the user's question in a professional tone." + raw_instructions = st.text_input( + "Instructions for the chatbot (comma-separated). Ex: Answer professionally, Be concise", + value="Answer professionally, Be concise" ) + instructions = [instr.strip() for instr in raw_instructions.split(',') if instr.strip()] + st.title("Ask questions on Paul Graham's Work Experience") if "messages" not in st.session_state.keys(): # Initialize the chat messages history @@ -125,16 +127,19 @@ def execute(): memory = ChatMemoryBuffer.from_defaults(token_limit=1200) if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine + formatted_instructions = "; ".join(instructions) if instructions else "Respond helpfully." + + context_prompt = ( + "You are a chatbot, able to answer questions on an essay about Paul Graham's Work experience. " + "Here are the relevant documents for the context:\n" + "{context_str}\n\n" + f"Instruction: Use the previous chat history, or the context above, to interact and help the user. {formatted_instructions}" + ) + st.session_state.chat_engine = index.as_chat_engine( chat_mode="condense_plus_context", memory=memory, - context_prompt=( - "You are a chatbot, able to answer questions on an essay about Paul Graham's Work experience." - "Here are the relevant documents for the context:\n" - "{context_str}" - "\nInstruction: Use the previous chat history, or the context above, to interact and help the user. " + - "{}".format(instructions if instructions else "") - ), + context_prompt=context_prompt, verbose=False, similarity_top_k=4, ) From c6309fdef297e96fc1a20597f35e76f62e8bcf4b Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Fri, 27 Jun 2025 12:43:34 -0400 Subject: [PATCH 10/12] Added `must_compute` flag to Detect --- aimon/decorators/detect.py | 15 +++++- aimon/types/inference_detect_params.py | 10 ++++ tests/test_detect.py | 74 ++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/aimon/decorators/detect.py b/aimon/decorators/detect.py index 99c7acf..29ac6fd 100644 --- a/aimon/decorators/detect.py +++ b/aimon/decorators/detect.py @@ -91,6 +91,8 @@ class Detect: The name of the application to use when publish is True. model_name : str, optional The name of the model to use when publish is True. + must_compute : str, optional + Indicates the computation strategy. Must be either 'all_or_none' or 'ignore_failures'. Default is 'all_or_none'. Example: -------- @@ -133,7 +135,7 @@ class Detect: """ DEFAULT_CONFIG = {'hallucination': {'detector_name': 'default'}} - def __init__(self, values_returned, api_key=None, config=None, async_mode=False, publish=False, application_name=None, model_name=None): + def __init__(self, values_returned, api_key=None, config=None, async_mode=False, publish=False, application_name=None, model_name=None, must_compute='all_or_none'): """ :param values_returned: A list of values in the order returned by the decorated function Acceptable values are 'generated_text', 'context', 'user_query', 'instructions' @@ -144,6 +146,7 @@ def __init__(self, values_returned, api_key=None, config=None, async_mode=False, :param publish: Boolean, if True, the payload will be published to AIMon and can be viewed on the AIMon UI. Default is False. :param application_name: The name of the application to use when publish is True :param model_name: The name of the model to use when publish is True + :param must_compute: String, indicates the computation strategy. Must be either 'all_or_none' or 'ignore_failures'. Default is 'all_or_none'. """ api_key = os.getenv('AIMON_API_KEY') if not api_key else api_key if api_key is None: @@ -163,8 +166,15 @@ def __init__(self, values_returned, api_key=None, config=None, async_mode=False, if model_name is None: raise ValueError("Model name must be provided if publish is True") + # Validate must_compute parameter + if not isinstance(must_compute, str): + raise ValueError("`must_compute` must be a string value") + if must_compute not in ['all_or_none', 'ignore_failures']: + raise ValueError("`must_compute` must be either 'all_or_none' or 'ignore_failures'") + self.must_compute = must_compute + self.application_name = application_name - self.model_name = model_name + self.model_name = model_name def __call__(self, func): @wraps(func) @@ -181,6 +191,7 @@ def wrapper(*args, **kwargs): aimon_payload['config'] = self.config aimon_payload['publish'] = self.publish aimon_payload['async_mode'] = self.async_mode + aimon_payload['must_compute'] = self.must_compute # Include application_name and model_name if publishing if self.publish: diff --git a/aimon/types/inference_detect_params.py b/aimon/types/inference_detect_params.py index fe98ab0..c2e36e3 100644 --- a/aimon/types/inference_detect_params.py +++ b/aimon/types/inference_detect_params.py @@ -16,6 +16,7 @@ "BodyConfigHallucination", "BodyConfigHallucinationV0_2", "BodyConfigInstructionAdherence", + "BodyConfigPii", "BodyConfigToxicity", ] @@ -48,6 +49,10 @@ class BodyConfigToxicity(TypedDict, total=False): detector_name: Literal["default"] +class BodyConfigPii(TypedDict, total=False): + detector_name: Literal["default"] + + class BodyConfig(TypedDict, total=False): completeness: BodyConfigCompleteness @@ -61,6 +66,8 @@ class BodyConfig(TypedDict, total=False): toxicity: BodyConfigToxicity + pii: BodyConfigPii + class Body(TypedDict, total=False): context: Required[Union[List[str], str]] @@ -81,6 +88,9 @@ class Body(TypedDict, total=False): model_name: str """The model name for publishing metrics for an application.""" + must_compute: str + """Indicates the computation strategy. Must be either 'all_or_none' or 'ignore_failures'.""" + publish: bool """Indicates whether to publish metrics.""" diff --git a/tests/test_detect.py b/tests/test_detect.py index 571902b..a7e9da4 100644 --- a/tests/test_detect.py +++ b/tests/test_detect.py @@ -825,3 +825,77 @@ def test_evaluate_with_new_model(self): import os if os.path.exists(dataset_path): os.remove(dataset_path) + + def test_must_compute_validation(self): + """Test that the must_compute parameter is properly validated.""" + print("\n=== Testing must_compute validation ===") + + # Test config with both hallucination and PII + test_config = { + "hallucination": { + "detector_name": "default" + }, + "pii": { + "detector_name": "default" + } + } + print(f"Test Config: {test_config}") + + # Test valid values + valid_values = ['all_or_none', 'ignore_failures'] + print(f"Testing valid must_compute values: {valid_values}") + + for value in valid_values: + print(f"Testing valid must_compute value: {value}") + detect = Detect( + values_returned=["context", "generated_text"], + api_key=self.api_key, + config=test_config, + must_compute=value + ) + assert detect.must_compute == value + print(f"✅ Successfully validated must_compute value: {value}") + + # Test invalid string value + invalid_string_value = "invalid_value" + print(f"Testing invalid must_compute string value: {invalid_string_value}") + try: + Detect( + values_returned=["context", "generated_text"], + api_key=self.api_key, + config=test_config, + must_compute=invalid_string_value + ) + print("❌ ERROR: Expected ValueError but none was raised - This should not happen") + assert False, "Expected ValueError for invalid string value" + except ValueError as e: + print(f"✅ Successfully caught ValueError for invalid string: {str(e)}") + assert "`must_compute` must be either 'all_or_none' or 'ignore_failures'" in str(e) + + # Test non-string value + non_string_value = 123 + print(f"Testing non-string must_compute value: {non_string_value}") + try: + Detect( + values_returned=["context", "generated_text"], + api_key=self.api_key, + config=test_config, + must_compute=non_string_value + ) + print("❌ ERROR: Expected ValueError but none was raised - This should not happen") + assert False, "Expected ValueError for non-string value" + except ValueError as e: + print(f"✅ Successfully caught ValueError for non-string: {str(e)}") + assert "`must_compute` must be a string value" in str(e) + + # Test default value + print("Testing default must_compute value: default") + detect_default = Detect( + values_returned=["context", "generated_text"], + api_key=self.api_key, + config=test_config + ) + assert detect_default.must_compute == 'all_or_none' + print(f"✅ Successfully validated default must_compute value: {detect_default.must_compute}") + + print("🎉 Result: must_compute validation working correctly") From a21f7ef4c10c81e24bbe3ac86cf79f154fea4a7e Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Fri, 27 Jun 2025 12:50:36 -0400 Subject: [PATCH 11/12] Replaced PII with completeness --- aimon/types/inference_detect_params.py | 9 --------- tests/test_detect.py | 4 ++-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/aimon/types/inference_detect_params.py b/aimon/types/inference_detect_params.py index c2e36e3..be05eb5 100644 --- a/aimon/types/inference_detect_params.py +++ b/aimon/types/inference_detect_params.py @@ -16,7 +16,6 @@ "BodyConfigHallucination", "BodyConfigHallucinationV0_2", "BodyConfigInstructionAdherence", - "BodyConfigPii", "BodyConfigToxicity", ] @@ -48,11 +47,6 @@ class BodyConfigInstructionAdherence(TypedDict, total=False): class BodyConfigToxicity(TypedDict, total=False): detector_name: Literal["default"] - -class BodyConfigPii(TypedDict, total=False): - detector_name: Literal["default"] - - class BodyConfig(TypedDict, total=False): completeness: BodyConfigCompleteness @@ -66,9 +60,6 @@ class BodyConfig(TypedDict, total=False): toxicity: BodyConfigToxicity - pii: BodyConfigPii - - class Body(TypedDict, total=False): context: Required[Union[List[str], str]] """Context as an array of strings or a single string""" diff --git a/tests/test_detect.py b/tests/test_detect.py index 34ac9ad..96ae52f 100644 --- a/tests/test_detect.py +++ b/tests/test_detect.py @@ -829,12 +829,12 @@ def test_must_compute_validation(self): """Test that the must_compute parameter is properly validated.""" print("\n=== Testing must_compute validation ===") - # Test config with both hallucination and PII + # Test config with both hallucination and completeness test_config = { "hallucination": { "detector_name": "default" }, - "pii": { + "completeness": { "detector_name": "default" } } From f371573468961052d88b4d57bdbefdcd6ed7e56e Mon Sep 17 00:00:00 2001 From: Devvrat Bhardwaj Date: Fri, 27 Jun 2025 13:04:54 -0400 Subject: [PATCH 12/12] Added must compute tests with the actual service --- tests/test_detect.py | 81 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/test_detect.py b/tests/test_detect.py index 96ae52f..8fc8dfc 100644 --- a/tests/test_detect.py +++ b/tests/test_detect.py @@ -898,3 +898,84 @@ def test_must_compute_validation(self): print(f"✅ Successfully validated default must_compute value: {detect_default.must_compute}") print("🎉 Result: must_compute validation working correctly") + + def test_must_compute_with_actual_service(self): + """Test must_compute functionality with actual service calls.""" + print("\n=== Testing must_compute with actual service ===") + + # Test config with both hallucination and completeness + test_config = { + "hallucination": { + "detector_name": "default" + }, + "completeness": { + "detector_name": "default" + } + } + print(f"Test Config: {test_config}") + + # Test both must_compute values + for must_compute_value in ['all_or_none', 'ignore_failures']: + print(f"\n--- Testing must_compute: {must_compute_value} ---") + + detect = Detect( + values_returned=["context", "generated_text", "user_query"], + api_key=self.api_key, + config=test_config, + must_compute=must_compute_value + ) + + @detect + def generate_summary(context, query): + generated_text = f"Summary of {context} based on query: {query}" + return context, generated_text, query + + # Test data + context = "Machine learning is a subset of artificial intelligence that enables computers to learn without being explicitly programmed." + query = "What is machine learning?" + + print(f"Input Context: {context}") + print(f"Input Query: {query}") + print(f"Must Compute: {must_compute_value}") + + try: + # Call the decorated function + context_ret, generated_text, query_ret, result = generate_summary(context, query) + + print(f"✅ API Call Successful!") + print(f"Status Code: {result.status}") + print(f"Generated Text: {generated_text}") + + # Display response details + if hasattr(result.detect_response, 'hallucination'): + hallucination = result.detect_response.hallucination + print(f"Hallucination Score: {hallucination.get('score', 'N/A')}") + print(f"Is Hallucinated: {hallucination.get('is_hallucinated', 'N/A')}") + + if hasattr(result.detect_response, 'completeness'): + completeness = result.detect_response.completeness + print(f"Completeness Score: {completeness.get('score', 'N/A')}") + + # Show the full response structure + print(f"Response Object Type: {type(result.detect_response)}") + if hasattr(result.detect_response, '__dict__'): + print(f"Response Attributes: {list(result.detect_response.__dict__.keys())}") + + except Exception as e: + error_message = str(e) + print(f"API Call Result: {error_message}") + print(f"Error Type: {type(e).__name__}") + + # For all_or_none, 503 is expected when services are unavailable + if must_compute_value == 'all_or_none' and '503' in error_message: + print("✅ Expected behavior: all_or_none returns 503 when services unavailable") + # For ignore_failures, we expect success or different error handling + elif must_compute_value == 'ignore_failures': + if '503' in error_message: + print("❌ Unexpected: ignore_failures should handle service unavailability") + else: + print("✅ Expected behavior: ignore_failures handled the error appropriately") + else: + print(f"❌ Unexpected error for {must_compute_value}: {error_message}") + + print("\n🎉 All must_compute service tests completed!")