feat: add image vision provider

This commit is contained in:
zhayujie
2026-03-09 11:37:45 +08:00
parent 6be2034110
commit 3b8b5625f8
7 changed files with 81 additions and 19 deletions

View File

@@ -123,13 +123,18 @@ def should_include_skill(
return False
# Check environment variables (API keys)
# Simple rule: All required env vars must be set
# All required env vars must be set
required_env = metadata.requires.get('env', [])
if required_env:
for env_name in required_env:
if not has_env_var(env_name):
# Missing required API key → disable skill
return False
# Check anyEnv (at least one must be present)
any_env = metadata.requires.get('anyEnv', [])
if any_env:
if not any(has_env_var(e) for e in any_env):
return False
return True

View File

@@ -32,6 +32,7 @@ def format_skills_for_prompt(skills: List[Skill]) -> str:
lines.append(f" <name>{_escape_xml(skill.name)}</name>")
lines.append(f" <description>{_escape_xml(skill.description)}</description>")
lines.append(f" <location>{_escape_xml(skill.file_path)}</location>")
lines.append(f" <base_dir>{_escape_xml(skill.base_dir)}</base_dir>")
lines.append(" </skill>")
lines.append("</available_skills>")

View File

@@ -355,6 +355,37 @@ def load_config():
logger.info("[INIT] Debug: {}".format(config.get("debug", False)))
logger.info("[INIT] ========================================")
# Sync selected config values to environment variables so that
# subprocesses (e.g. shell skill scripts) can access them directly.
# Existing env vars are NOT overwritten (env takes precedence).
_CONFIG_TO_ENV = {
"open_ai_api_key": "OPENAI_API_KEY",
"open_ai_api_base": "OPENAI_API_BASE",
"linkai_api_key": "LINKAI_API_KEY",
"linkai_api_base": "LINKAI_API_BASE",
"claude_api_key": "CLAUDE_API_KEY",
"claude_api_base": "CLAUDE_API_BASE",
"gemini_api_key": "GEMINI_API_KEY",
"gemini_api_base": "GEMINI_API_BASE",
"minimax_api_key": "MINIMAX_API_KEY",
"minimax_api_base": "MINIMAX_API_BASE",
"zhipu_ai_api_key": "ZHIPU_AI_API_KEY",
"zhipu_ai_api_base": "ZHIPU_AI_API_BASE",
"moonshot_api_key": "MOONSHOT_API_KEY",
"moonshot_api_base": "MOONSHOT_API_BASE",
"ark_api_key": "ARK_API_KEY",
"ark_api_base": "ARK_API_BASE",
}
injected = 0
for conf_key, env_key in _CONFIG_TO_ENV.items():
if env_key not in os.environ:
val = config.get(conf_key, "")
if val:
os.environ[env_key] = str(val)
injected += 1
if injected:
logger.info("[INIT] Synced {} config values to environment variables".format(injected))
config.load_user_datas()

View File

@@ -72,8 +72,12 @@ bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4o-mini"
| Variable | Required | Default | Description |
|----------|----------|---------|-------------|
| `OPENAI_API_KEY` | Yes | - | Your OpenAI API key |
| `OPENAI_API_BASE` | No | `https://api.openai.com/v1` | Custom API base URL |
| `OPENAI_API_KEY` | No* | - | OpenAI API key (preferred) |
| `OPENAI_API_BASE` | No | `https://api.openai.com/v1` | Custom OpenAI API base URL |
| `LINKAI_API_KEY` | No* | - | LinkAI API key (fallback when OPENAI_API_KEY is not set) |
| `LINKAI_API_BASE` | No | `https://api.link-ai.tech` | LinkAI API base URL |
\* At least one of `OPENAI_API_KEY` or `LINKAI_API_KEY` must be set. OpenAI takes priority when both are configured.
## Response Format

View File

@@ -6,8 +6,7 @@ metadata:
emoji: 👁️
requires:
bins: ["curl", "base64"]
env: ["OPENAI_API_KEY"]
primaryEnv: "OPENAI_API_KEY"
anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"]
---
# OpenAI Image Vision
@@ -16,12 +15,13 @@ Analyze images using OpenAI's GPT-4 Vision API. The model can understand visual
## Setup
This skill requires an OpenAI API key. If not configured:
This skill requires at least one of the following API keys (OpenAI is preferred when both are set):
1. Get your API key from https://platform.openai.com/api-keys
2. Set the key using: `env_config(action="set", key="OPENAI_API_KEY", value="your-key")`
1. **OpenAI** (preferred): `env_config(action="set", key="OPENAI_API_KEY", value="your-key")`
2. **LinkAI** (fallback): `env_config(action="set", key="LINKAI_API_KEY", value="your-key")`
Optional: Set custom API base URL:
Optional: Set custom API base URL (default: https://api.openai.com/v1):
```bash
env_config(action="set", key="OPENAI_API_BASE", value="your-base-url")
```

View File

@@ -18,13 +18,21 @@ if [ -z "$question" ]; then
exit 1
fi
if [ -z "${OPENAI_API_KEY:-}" ]; then
echo '{"error": "OPENAI_API_KEY environment variable is not set", "help": "Visit https://platform.openai.com/api-keys to get an API key"}'
# Determine API key and base URL (prefer OpenAI, fallback to LinkAI)
api_key="${OPENAI_API_KEY:-}"
api_base="${OPENAI_API_BASE:-https://api.openai.com/v1}"
if [ -z "$api_key" ] && [ -n "${LINKAI_API_KEY:-}" ]; then
api_key="$LINKAI_API_KEY"
api_base="${LINKAI_API_BASE:-https://api.link-ai.tech}/v1"
>&2 echo "[vision.sh] Using LinkAI API (OPENAI_API_KEY not set)"
fi
if [ -z "$api_key" ]; then
echo '{"error": "No API key configured. Set OPENAI_API_KEY or LINKAI_API_KEY", "help": "Visit https://platform.openai.com/api-keys or https://link-ai.tech to get an API key"}'
exit 1
fi
# Set API base URL (default to OpenAI's official endpoint)
api_base="${OPENAI_API_BASE:-https://api.openai.com/v1}"
# Remove trailing slash if present
api_base="${api_base%/}"
@@ -175,7 +183,7 @@ fi
curl_cmd=$(command -v curl)
response=$($curl_cmd -sS --max-time 60 \
-X POST \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Authorization: Bearer $api_key" \
-H "Content-Type: application/json" \
-d "$request_body" \
"$api_base/chat/completions" 2>&1)

View File

@@ -261,14 +261,15 @@ Write the YAML frontmatter with `name`, `description`, and optional `metadata`:
- Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when the agent needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks"
- `metadata`: (Optional) Specify requirements and configuration
- `requires.bins`: Required binaries (e.g., `["curl", "jq"]`)
- `requires.env`: Required environment variables (e.g., `["OPENAI_API_KEY"]`)
- `primaryEnv`: Primary environment variable name (for API keys)
- `requires.env`: Required environment variables — all must be set (e.g., `["MYAPI_KEY"]`)
- `requires.anyEnv`: Alternative environment variables — at least one must be set (e.g., `["OPENAI_API_KEY", "LINKAI_API_KEY"]`)
- `requires.anyBins`: Alternative binaries — at least one must be present
- `always`: Set to `true` to always load regardless of requirements
- `emoji`: Skill icon (optional)
**API Key Requirements**:
If your skill needs an API key, declare it in metadata:
If your skill needs a single API key, declare it in `requires.env`:
```yaml
---
@@ -278,7 +279,19 @@ metadata:
requires:
bins: ["curl"]
env: ["MYAPI_KEY"]
primaryEnv: "MYAPI_KEY"
---
```
If your skill supports multiple API key providers (e.g., OpenAI or LinkAI), use `requires.anyEnv`:
```yaml
---
name: my-vision
description: Analyze images using Vision API
metadata:
requires:
bins: ["curl"]
anyEnv: ["OPENAI_API_KEY", "LINKAI_API_KEY"]
---
```