feat: Add Jina.ai API

inf-monkeys · May 24, 2024 · 566965e · 566965e
1 parent 535b23e
commit 566965e
Show file tree

Hide file tree

Showing 11 changed files with 552 additions and 390 deletions.
diff --git a/.github/workflows/publish-docker-image.yml b/.github/workflows/publish-docker-image.yml
@@ -22,7 +22,7 @@ jobs:
         id: meta
         uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
         with:
-          images: infmonkeys/monkey-tools-tavily-ai
+          images: infmonkeys/monkey-tools-internet
 
       - name: Build and push Docker image
         uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671

diff --git a/.github/workflows/publish-huawei-docker-image.yml b/.github/workflows/publish-huawei-docker-image.yml
diff --git a/Dockerfile b/Dockerfile
@@ -11,7 +11,7 @@ ADD . /app
 RUN pip install --no-cache-dir -r requirements.txt
 
 # Make port 80 available to the world outside this container
-EXPOSE 8890
+EXPOSE 5000
 
 # Run app.py when the container launches
-CMD ["python", "main.py"]
+CMD ["python", "main.py"]
diff --git a/README.md b/README.md
@@ -1,11 +1,21 @@
-# ✨ Monkey Tools for Tavily AI
+# 🛜 Monkey Tools for Scrap the web
+
+> Based on opensource tools and online services.
+
+## Available Tools
+
+- [Jina.ai Reader](https://jina.ai/reader/#apiform)
+- [Tavily AI](https://app.tavily.com/)
 
 ## Configuration
 
 ```yaml
 server:
-  port: 8890
+  port: 5000
 
 tavily:
   apikey: 
+
+jinaai:
+  apikey:
 ```
diff --git a/main.py b/main.py
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 tavily-python
 flask
 flask_restx
-pyyaml
+pyyaml
+requests
diff --git a/src/config.py b/src/config.py
@@ -0,0 +1,9 @@
+import yaml
+
+
+def load_config(filename):
+    with open(filename, "r") as file:
+        config = yaml.safe_load(file)
+    return config
+
+config_data = load_config("config.yaml")
diff --git a/src/server/app.py b/src/server/app.py
@@ -0,0 +1,49 @@
+from flask import Flask, request
+from flask_restx import Api
+import logging
+
+# Init Flask app
+app = Flask(__name__)
+api = Api(
+    app,
+    version="1.0",
+    title="Monkey Tools for The Internet",
+    description="Monkey Tools for The Internet",
+)
+
+
+@app.before_request
+def before_request():
+    request.app_id = request.headers.get("x-monkeys-appid")
+    request.user_id = request.headers.get("x-monkeys-userid")
+    request.team_id = request.headers.get("x-monkeys-teamid")
+    request.workflow_id = request.headers.get("x-monkeys-workflowid")
+    request.workflow_instance_id = request.headers.get("x-monkeys-workflow-instanceid")
+
+
+@api.errorhandler(Exception)
+def handle_exception(error):
+    return {"message": str(error)}, 500
+
+
+@app.get("/manifest.json")
+def get_manifest():
+    return {
+        "schema_version": "v1",
+        "display_name": "Monkey Tools for The Internet",
+        "namespace": "monkey_tools_internet",
+        "auth": {"type": "none"},
+        "api": {"type": "openapi", "url": "/swagger.json"},
+        "contact_email": "dev@inf-monkeys.com",
+    }
+
+class NoSuccessfulRequestLoggingFilter(logging.Filter):
+    def filter(self, record):
+        return "GET /" not in record.getMessage()
+
+
+# 获取 Flask 的默认日志记录器
+log = logging.getLogger("werkzeug")
+# 创建并添加过滤器
+log.addFilter(NoSuccessfulRequestLoggingFilter())
+
diff --git a/src/services/__init__.py b/src/services/__init__.py
@@ -0,0 +1,2 @@
+from .import jinaai_api
+from .import tavily_api
diff --git a/src/services/jinaai_api.py b/src/services/jinaai_api.py
@@ -0,0 +1,181 @@
+from flask import request
+import requests
+from flask_restx import Resource
+from src.server.app import api
+from src.config import config_data
+
+jinaai_ns = api.namespace("jinaai", description="Jina.ai API")
+
+
+@jinaai_ns.route("/reader")
+class WeatherLookUpResource(Resource):
+    @jinaai_ns.doc("reader")
+    @jinaai_ns.vendor(
+        {
+            "x-monkey-tool-name": "jinaai_reader",
+            "x-monkey-tool-categories": ["quert"],
+            "x-monkey-tool-display-name": "Jinai.ai Reader",
+            "x-monkey-tool-description": "Read and search the web using Jinai.ai",
+            "x-monkey-tool-icon": "emoji:🌐:#ceefc5",
+            "x-monkey-tool-input": [
+                {
+                    "displayName": "Mode",
+                    "name": "mode",
+                    "type": "options",
+                    "default": "search",
+                    "options": [
+                        {
+                            "name": "search",
+                            "value": "search",
+                        },
+                        {
+                            "name": "read",
+                            "value": "read",
+                        },
+                    ],
+                },
+                {
+                    "displayName": "Input",
+                    "name": "input",
+                    "type": "string",
+                    "required": True,
+                    "description": "Any urls or questions you want to search or read on the web.",
+                },
+                {
+                    "displayName": "JSON Response",
+                    "name": "enable_json_response",
+                    "type": "boolean",
+                    "required": False,
+                    "default": False,
+                },
+                {
+                    "displayName": "Image Caption",
+                    "name": "enable_image_caption",
+                    "type": "boolean",
+                    "required": False,
+                    "default": False,
+                    "description": "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing.",
+                },
+                {
+                    "diaplayName": "Gather All Links At the End",
+                    "name": "gather_all_links_at_the_end",
+                    "type": "boolean",
+                    "required": False,
+                    "default": False,
+                    "description": 'A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.',
+                },
+                {
+                    "displayName": "Gather All Images At the End",
+                    "name": "gather_all_images_at_the_end",
+                    "type": "boolean",
+                    "required": False,
+                    "default": False,
+                    "description": 'An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.',
+                },
+            ],
+            "x-monkey-tool-output": [
+                {
+                    "displayName": "Json Result for Search",
+                    "name": "json_result_for_search",
+                    "type": "array",
+                    "properties": [
+                        {
+                            "displayName": "Url",
+                            "name": "url",
+                            "type": "string",
+                        },
+                        {
+                            "displayName": "Title",
+                            "name": "title",
+                            "type": "string",
+                        },
+                        {
+                            "displayName": "Content",
+                            "name": "content",
+                            "type": "string",
+                        },
+                        {
+                            "displayName": "Description",
+                            "name": "description",
+                            "type": "string",
+                        },
+                    ],
+                },
+                {
+                    "displayName": "Json Result for Read",
+                    "name": "json_result_for_read",
+                    "type": "json",
+                    "properties": [
+                        {
+                            "displayName": "Url",
+                            "name": "url",
+                            "type": "string",
+                        },
+                        {
+                            "displayName": "Title",
+                            "name": "title",
+                            "type": "string",
+                        },
+                        {
+                            "displayName": "Content",
+                            "name": "content",
+                            "type": "string",
+                        },
+                        {
+                            "displayName": "Description",
+                            "name": "description",
+                            "type": "string",
+                        },
+                    ],
+                },
+                {
+                    "displayName": "Markdown Result",
+                    "name": "markdown_result",
+                    "type": "string",
+                },
+            ],
+            "x-monkey-tool-extra": {
+                "estimateTime": 5,
+            },
+        }
+    )
+    def post(self):
+        json = request.json
+        mode = json.get("mode", "search")
+        input = json.get("input")
+        if not input:
+            raise Exception("Input is required")
+        enable_json_response = json.get("enable_json_response", False)
+        enable_image_caption = json.get("enable_image_caption", False)
+        gather_all_links_at_the_end = json.get("gather_all_links_at_the_end", False)
+        gather_all_images_at_the_end = json.get("gather_all_images_at_the_end", False)
+
+        apikey = config_data.get("jinaai", {}).get("apikey")
+
+        server = "https://s.jina.ai" if mode == "search" else "https://r.jina.ai"
+        api = f"{server}/{input}"
+        headers = {}
+        if enable_json_response:
+            headers["Accept"] = "application/json"
+        if enable_image_caption:
+            headers["X-With-Generated-Alt"] = "true"
+        if gather_all_links_at_the_end:
+            headers["X-With-Links-Summary"] = "true"
+        if gather_all_images_at_the_end:
+            headers["X-With-Images-Summary"] = "true"
+        if apikey:
+            headers["Authorization"] = f"Bearer {apikey}"
+
+        r = requests.get(api, headers=headers)
+        if enable_json_response:
+            result = r.json()
+            code = result.get("code", 200)
+            if code != 200:
+                raise Exception(result.get("readableMessage"))
+            data = result.get("data", [])
+            if mode == "read":
+                return {"json_result_for_read": data}
+            else:
+                return {"json_result_for_search": data}
+        else:
+            return {"markdown_result": r.text}