feat(channel): add wecom_bot channel

2026-03-18 12:40:06 +08:00 · 2026-03-16 14:39:15 +08:00
parent c4b5f7fbae
commit d4480b695e
10 changed files with 995 additions and 20 deletions
--- a/agent/tools/scheduler/integration.py
+++ b/agent/tools/scheduler/integration.py
@@ -134,12 +134,13 @@ def _execute_agent_task(task: dict, agent_bridge):
        elif channel_type == "dingtalk":
            # DingTalk requires msg object, set to None for scheduled tasks
            context["msg"] = None
-            # 如果是单聊，需要传递 sender_staff_id
            if not is_group:
                sender_staff_id = action.get("dingtalk_sender_staff_id")
                if sender_staff_id:
                    context["dingtalk_sender_staff_id"] = sender_staff_id
-        
+        elif channel_type == "wecom_bot":
+            context["msg"] = None
+
        # Use Agent to execute the task
        # Mark this as a scheduled task execution to prevent recursive task creation
        context["is_scheduled_task"] = True
@@ -234,7 +235,9 @@ def _execute_send_message(task: dict, agent_bridge):
                    logger.debug(f"[Scheduler] DingTalk single chat: sender_staff_id={sender_staff_id}")
                else:
                    logger.warning(f"[Scheduler] Task {task['id']}: DingTalk single chat message missing sender_staff_id")
-        
+        elif channel_type == "wecom_bot":
+            context["msg"] = None
+
        # Create reply
        reply = Reply(ReplyType.TEXT, content)
        
@@ -327,31 +330,31 @@ def _execute_tool_call(task: dict, agent_bridge):
            context["request_id"] = request_id
            logger.debug(f"[Scheduler] Generated request_id for web channel: {request_id}")
        elif channel_type == "feishu":
-            # Feishu channel: for scheduled tasks, send as new message (no msg_id to reply to)
            context["receive_id_type"] = "chat_id" if is_group else "open_id"
            context["msg"] = None
            logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
-        
+        elif channel_type == "wecom_bot":
+            context["msg"] = None
+
        reply = Reply(ReplyType.TEXT, content)
-        
+
        # Get channel and send
        from channel.channel_factory import create_channel
-        
+
        try:
            channel = create_channel(channel_type)
            if channel:
-                # For web channel, register the request_id to session mapping
                if channel_type == "web" and hasattr(channel, 'request_to_session'):
                    channel.request_to_session[request_id] = receiver
                    logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
-                
+
                channel.send(reply, context)
                logger.info(f"[Scheduler] Task {task['id']} executed: sent tool result to {receiver}")
            else:
                logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
        except Exception as e:
            logger.error(f"[Scheduler] Failed to send tool result: {e}")
-            
+
    except Exception as e:
        logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")

@@ -409,7 +412,9 @@ def _execute_skill_call(task: dict, agent_bridge):
        elif channel_type == "feishu":
            context["receive_id_type"] = "chat_id" if is_group else "open_id"
            context["msg"] = None
-        
+        elif channel_type == "wecom_bot":
+            context["msg"] = None
+
        # Use Agent to execute the skill
        try:
            # Don't clear history - scheduler tasks use isolated session_id so they won't pollute user conversations
--- a/agent/tools/web_fetch/web_fetch.py
+++ b/agent/tools/web_fetch/web_fetch.py
@@ -36,6 +36,30 @@ PPT_SUFFIXES: Set[str] = {".ppt", ".pptx"}

 ALL_DOC_SUFFIXES = PDF_SUFFIXES | WORD_SUFFIXES | TEXT_SUFFIXES | SPREADSHEET_SUFFIXES | PPT_SUFFIXES

+_CHARSET_RE = re.compile(r'charset\s*=\s*["\']?\s*([\w\-]+)', re.IGNORECASE)
+_META_CHARSET_RE = re.compile(rb'<meta[^>]+charset\s*=\s*["\']?\s*([\w\-]+)', re.IGNORECASE)
+_META_HTTP_EQUIV_RE = re.compile(
+    rb'<meta[^>]+http-equiv\s*=\s*["\']?Content-Type["\']?[^>]+content\s*=\s*["\'][^"\']*charset=([\w\-]+)',
+    re.IGNORECASE,
+)
+
+
+def _extract_charset_from_content_type(content_type: str) -> Optional[str]:
+    """Extract charset from Content-Type header value."""
+    m = _CHARSET_RE.search(content_type)
+    return m.group(1) if m else None
+
+
+def _extract_charset_from_html_meta(raw_bytes: bytes) -> Optional[str]:
+    """Extract charset from HTML <meta> tags in the first few KB of raw bytes."""
+    m = _META_CHARSET_RE.search(raw_bytes)
+    if m:
+        return m.group(1).decode("ascii", errors="ignore")
+    m = _META_HTTP_EQUIV_RE.search(raw_bytes)
+    if m:
+        return m.group(1).decode("ascii", errors="ignore")
+    return None
+

 def _get_url_suffix(url: str) -> str:
    """Extract file extension from URL path, ignoring query params."""
@@ -114,14 +138,7 @@ class WebFetch(BaseTool):
        if self._is_binary_content_type(content_type) and not _is_document_url(url):
            return self._handle_download_by_content_type(url, response, content_type)

-        # Fix encoding: use apparent_encoding to auto-detect, but keep Windows encodings as-is
-        if response.apparent_encoding and response.apparent_encoding.lower().startswith("windows"):
-            response.encoding = response.encoding
-        else:
-            response.encoding = response.apparent_encoding
-        if not response.encoding:
-            response.encoding = "utf-8"
-
+        response.encoding = self._detect_encoding(response)
        html = response.text
        title = self._extract_title(html)
        text = self._extract_text(html)
@@ -306,6 +323,35 @@ class WebFetch(BaseTool):

        return "\n\n".join(text_parts)

+    # ---- Encoding detection ----
+
+    @staticmethod
+    def _detect_encoding(response: requests.Response) -> str:
+        """Detect response encoding with priority: Content-Type header > HTML meta > chardet > utf-8."""
+        # 1. Check Content-Type header for explicit charset
+        content_type = response.headers.get("Content-Type", "")
+        charset = _extract_charset_from_content_type(content_type)
+        if charset:
+            return charset
+
+        # 2. Scan raw bytes for HTML meta charset declaration
+        raw = response.content[:4096]
+        charset = _extract_charset_from_html_meta(raw)
+        if charset:
+            return charset
+
+        # 3. Use apparent_encoding (chardet-based detection) if confident enough
+        apparent = response.apparent_encoding
+        if apparent:
+            apparent_lower = apparent.lower()
+            # Trust CJK / Windows encodings detected by chardet
+            trusted_prefixes = ("utf", "gb", "big5", "euc", "shift_jis", "iso-2022", "windows", "ascii")
+            if any(apparent_lower.startswith(p) for p in trusted_prefixes):
+                return apparent
+
+        # 4. Fallback
+        return "utf-8"
+
    # ---- Helper methods ----

    def _ensure_tmp_dir(self) -> str: