inference-gateway · edenreich · May 26, 2025 · May 26, 2025 · May 26, 2025 · May 26, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1,3 @@
 .devcontainer/** linguist-vendored=true
+
+inference_gateway/models.py linguist-generated=true
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ dist
 .coverage
 node_modules/
 .mypy_cache/
+**/.env
diff --git a/README.md b/README.md
@@ -17,7 +17,9 @@
   - [Error Handling](#error-handling)
   - [Advanced Usage](#advanced-usage)
     - [Using Tools](#using-tools)
+    - [Listing Available MCP Tools](#listing-available-mcp-tools)
     - [Custom HTTP Configuration](#custom-http-configuration)
+  - [Examples](#examples)
   - [License](#license)
 
 A modern Python SDK for interacting with the [Inference Gateway](https://github.com/edenreich/inference-gateway), providing a unified interface to multiple AI providers.
@@ -41,17 +43,17 @@ pip install inference-gateway
 ### Basic Usage
 
 ```python
-from inference_gateway import InferenceGatewayClient, Message, MessageRole
+from inference_gateway import InferenceGatewayClient, Message
 
 # Initialize client
-client = InferenceGatewayClient("http://localhost:8080")
+client = InferenceGatewayClient("http://localhost:8080/v1")
 
 # Simple chat completion
 response = client.create_chat_completion(
     model="openai/gpt-4",
     messages=[
-        Message(role=MessageRole.SYSTEM, content="You are a helpful assistant"),
-        Message(role=MessageRole.USER, content="Hello!")
+        Message(role="system", content="You are a helpful assistant"),
+        Message(role="user", content="Hello!")
     ]
 )
 
@@ -70,18 +72,18 @@ print(response.choices[0].message.content)
 from inference_gateway import InferenceGatewayClient
 
 # Basic configuration
-client = InferenceGatewayClient("http://localhost:8080")
+client = InferenceGatewayClient("http://localhost:8080/v1")
 
 # With authentication
 client = InferenceGatewayClient(
-    "http://localhost:8080",
+    "http://localhost:8080/v1",
     token="your-api-token",
     timeout=60.0  # Custom timeout
 )
 
 # Using httpx instead of requests
 client = InferenceGatewayClient(
-    "http://localhost:8080",
+    "http://localhost:8080/v1",
     use_httpx=True
 )
 ```
@@ -105,13 +107,13 @@ print("OpenAI models:", openai_models)
 #### Standard Completion
 
 ```python
-from inference_gateway import Message, MessageRole
+from inference_gateway import Message
 
 response = client.create_chat_completion(
     model="openai/gpt-4",
     messages=[
-        Message(role=MessageRole.SYSTEM, content="You are a helpful assistant"),
-        Message(role=MessageRole.USER, content="Explain quantum computing")
+        Message(role="system", content="You are a helpful assistant"),
+        Message(role="user", content="Explain quantum computing")
     ],
     max_tokens=500
 )
@@ -126,7 +128,7 @@ print(response.choices[0].message.content)
 for chunk in client.create_chat_completion_stream(
     model="ollama/llama2",
     messages=[
-        Message(role=MessageRole.USER, content="Tell me a story")
+        Message(role="user", content="Tell me a story")
     ],
     use_sse=True
 ):
@@ -136,7 +138,7 @@ for chunk in client.create_chat_completion_stream(
 for chunk in client.create_chat_completion_stream(
     model="anthropic/claude-3",
     messages=[
-        Message(role=MessageRole.USER, content="Explain AI safety")
+        Message(role="user", content="Explain AI safety")
     ],
     use_sse=False
 ):
@@ -186,43 +188,96 @@ except InferenceGatewayError as e:
 ### Using Tools
 
 ```python
-# List available MCP tools works when MCP_ENABLE and MCP_EXPOSE are set on the gateway
-tools = client.list_tools()
-print("Available tools:", tools)
+# Define a weather tool using type-safe Pydantic models
+from inference_gateway.models import ChatCompletionTool, FunctionObject, FunctionParameters
+
+weather_tool = ChatCompletionTool(
+    type="function",
+    function=FunctionObject(
+        name="get_current_weather",
+        description="Get the current weather in a given location",
+        parameters=FunctionParameters(
+            type="object",
+            properties={
+                "location": {
+                    "type": "string",
+                    "description": "The city and state, e.g. San Francisco, CA"
+                },
+                "unit": {
+                    "type": "string",
+                    "enum": ["celsius", "fahrenheit"],
+                    "description": "The temperature unit to use"
+                }
+            },
+            required=["location"]
+        )
+    )
+)
 
-# Use tools in chat completion works when MCP_ENABLE and MCP_EXPOSE are set to false on the gateway
+# Using tools in a chat completion
 response = client.create_chat_completion(
     model="openai/gpt-4",
-    messages=[...],
-    tools=[
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather",
-                "parameters": {...}
-            }
-        }
-    ]
+    messages=[
+        Message(role="system", content="You are a helpful assistant with access to weather information"),
+        Message(role="user", content="What is the weather like in New York?")
+    ],
+    tools=[weather_tool]  # Pass the tool definition
 )
+
+print(response.choices[0].message.content)
+
+# Check if the model made a tool call
+if response.choices[0].message.tool_calls:
+    for tool_call in response.choices[0].message.tool_calls:
+        print(f"Tool called: {tool_call.function.name}")
+        print(f"Arguments: {tool_call.function.arguments}")
 ```
 
+### Listing Available MCP Tools
+
+```python
+# List available MCP tools (requires MCP_ENABLE and MCP_EXPOSE to be set on the gateway)
+tools = client.list_tools()
+print("Available tools:", tools)
+```
+
+**Server-Side Tool Management**
+
+The SDK currently supports listing available MCP tools, which is particularly useful for UI applications that need to display connected tools to users. The key advantage is that tools are managed server-side:
+
+- **Automatic Tool Injection**: Tools are automatically inferred and injected into requests by the Inference Gateway server
+- **Simplified Client Code**: No need to manually manage or configure tools in your client application
+- **Transparent Tool Calls**: During streaming chat completions with configured MCP servers, tool calls appear in the response stream - no special handling required except optionally displaying them to users
+
+This architecture allows you to focus on LLM interactions while the gateway handles all tool management complexities behind the scenes.
+
 ### Custom HTTP Configuration
 
 ```python
 # With custom headers
 client = InferenceGatewayClient(
-    "http://localhost:8080",
+    "http://localhost:8080/v1",
     headers={"X-Custom-Header": "value"}
 )
 
 # With proxy settings
 client = InferenceGatewayClient(
-    "http://localhost:8080",
+    "http://localhost:8080/v1",
     proxies={"http": "http://proxy.example.com"}
 )
 ```
 
+## Examples
+
+For comprehensive examples demonstrating various use cases, see the [examples](examples/) directory:
+
+- [List LLMs](examples/list/) - How to list available models
+- [Chat](examples/chat/) - Basic and advanced chat completion examples
+- [Tools](examples/tools/) - Working with function tools
+- [MCP](examples/mcp/) - Model Context Protocol integration examples
+
+Each example includes a detailed README with setup instructions and explanations.
+
 ## License
 
 This SDK is distributed under the MIT License, see [LICENSE](LICENSE) for more information.
diff --git a/Taskfile.yml b/Taskfile.yml
@@ -43,7 +43,7 @@ tasks:
         --output inference_gateway/models.py
         --output-model-type pydantic_v2.BaseModel
         --enum-field-as-literal all
-        --target-python-version {{.PYTHON_VERSION}}
+        --target-python-version 3.12
         --use-schema-description
         --use-generic-container-types
         --use-standard-collections
@@ -58,7 +58,8 @@ tasks:
         --strict-nullable
         --allow-population-by-field-name
         --snake-case-field
-        --strip-default-none
+        --use-default
+        --use-default-kwarg
         --use-title-as-name
       - echo "✅ Models generated successfully"
       - task: format
@@ -67,17 +68,17 @@ tasks:
     desc: Format code with black and isort
     cmds:
       - echo "Formatting code..."
-      - black inference_gateway/ tests/
-      - isort inference_gateway/ tests/
+      - black inference_gateway/ tests/ examples/
+      - isort inference_gateway/ tests/ examples/
       - echo "✅ Code formatted"
 
   lint:
     desc: Run all linting checks
     cmds:
       - echo "Running linting checks..."
-      - black --check inference_gateway/ tests/
-      - isort --check-only inference_gateway/ tests/
-      - mypy inference_gateway/
+      - black --check inference_gateway/ tests/ examples/
+      - isort --check-only inference_gateway/ tests/ examples/
+      - mypy inference_gateway/ examples/
       - echo "✅ All linting checks passed"
 
   test:
@@ -122,6 +123,29 @@ tasks:
       - python -m build
       - echo "✅ Package built successfully"
 
+  install-global:
+    desc: Build and install the package globally for testing
+    deps:
+      - build
+    cmds:
+      - echo "Installing package globally..."
+      - pip uninstall -y inference-gateway || true
+      - pip install dist/*.whl --force-reinstall
+      - echo "✅ Package installed globally successfully"
+
+  install-global-dev:
+    desc: Build and install the package globally for testing (skip tests)
+    deps:
+      - clean
+      - format
+    cmds:
+      - echo "Building package (skipping tests)..."
+      - python -m build
+      - echo "Installing package globally..."
+      - pip uninstall -y inference-gateway || true
+      - pip install dist/*.whl --force-reinstall
+      - echo "✅ Package installed globally successfully"
+
   docs:serve:
     desc: Serve documentation locally (placeholder for future docs)
     cmds:

diff --git a/examples/.env.example b/examples/.env.example
@@ -0,0 +1,48 @@
+
+# General settings
+ENVIRONMENT=production
+ENABLE_TELEMETRY=false
+ENABLE_AUTH=false
+# Model Context Protocol (MCP)
+MCP_ENABLE=false
+MCP_EXPOSE=false
+MCP_SERVERS=
+MCP_CLIENT_TIMEOUT=5s
+MCP_DIAL_TIMEOUT=3s
+MCP_TLS_HANDSHAKE_TIMEOUT=3s
+MCP_RESPONSE_HEADER_TIMEOUT=3s
+MCP_EXPECT_CONTINUE_TIMEOUT=1s
+MCP_REQUEST_TIMEOUT=5s
+# OpenID Connect
+OIDC_ISSUER_URL=http://keycloak:8080/realms/inference-gateway-realm
+OIDC_CLIENT_ID=inference-gateway-client
+OIDC_CLIENT_SECRET=
+# Server settings
+SERVER_HOST=0.0.0.0
+SERVER_PORT=8080
+SERVER_READ_TIMEOUT=30s
+SERVER_WRITE_TIMEOUT=30s
+SERVER_IDLE_TIMEOUT=120s
+SERVER_TLS_CERT_PATH=
+SERVER_TLS_KEY_PATH=
+# Client settings
+CLIENT_TIMEOUT=30s
+CLIENT_MAX_IDLE_CONNS=20
+CLIENT_MAX_IDLE_CONNS_PER_HOST=20
+CLIENT_IDLE_CONN_TIMEOUT=30s
+CLIENT_TLS_MIN_VERSION=TLS12
+# Providers
+ANTHROPIC_API_URL=https://api.anthropic.com/v1
+ANTHROPIC_API_KEY=
+CLOUDFLARE_API_URL=https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai
+CLOUDFLARE_API_KEY=
+COHERE_API_URL=https://api.cohere.ai
+COHERE_API_KEY=
+GROQ_API_URL=https://api.groq.com/openai/v1
+GROQ_API_KEY=
+OLLAMA_API_URL=http://ollama:8080/v1
+OLLAMA_API_KEY=
+OPENAI_API_URL=https://api.openai.com/v1
+OPENAI_API_KEY=
+DEEPSEEK_API_URL=https://api.deepseek.com
+DEEPSEEK_API_KEY=
diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,26 @@
+# Examples
+
+Before starting with the examples, ensure you have the inference-gateway up and running:
+
+1. Copy the `.env.example` file to `.env` and set your provider key.
+
+2. Set your preferred Large Language Model (LLM) provider for the examples:
+
+```sh
+export LLM_NAME=groq/meta-llama/llama-4-scout-17b-16e-instruct
+```
+
+3. Run the Docker container:
+
+```
+docker run --rm -it -p 8080:8080 --env-file .env -e $LLM_NAME ghcr.io/inference-gateway/inference-gateway:0.7.1
+```
+
+Recommended is to set the environment variable `ENVIRONMENT=development` in your `.env` file to enable debug mode.
+
+The following examples demonstrate how to use the Inference Gateway SDK for various tasks:
+
+- [List LLMs](list/README.md)
+- [Chat](chat/README.md)
+- [Tools](tools/README.md)
+- [MCP](mcp/README.md)
diff --git a/examples/__init__.py b/examples/__init__.py
@@ -0,0 +1 @@
+# Examples package
Original file line number	Diff line number	Diff line change
		@@ -1 +1,3 @@
		.devcontainer/** linguist-vendored=true

		inference_gateway/models.py linguist-generated=true