From 30c6d9b5ae1a38ab592d873036eb47b54558c3b4 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Tue, 17 Mar 2026 21:21:03 +0800 Subject: [PATCH] feat: support file and image upload in web console, add office docs parsing in read tool --- agent/skills/loader.py | 2 +- agent/tools/read/read.py | 120 +++++++++++++++++++++- agent/tools/web_fetch/web_fetch.py | 2 +- channel/qq/qq_channel.py | 2 +- channel/web/chat.html | 62 ++++++----- channel/web/static/css/console.css | 94 +++++++++++++++++ channel/web/static/js/console.js | 159 +++++++++++++++++++++++++++-- channel/web/web_channel.py | 103 +++++++++++++++++++ 8 files changed, 506 insertions(+), 38 deletions(-) diff --git a/agent/skills/loader.py b/agent/skills/loader.py index 210ab73..f02346d 100644 --- a/agent/skills/loader.py +++ b/agent/skills/loader.py @@ -91,7 +91,7 @@ class SkillLoader: continue # Check if this is a skill file - is_root_md = include_root_files and entry.endswith('.md') + is_root_md = include_root_files and entry.endswith('.md') and entry.upper() != 'README.MD' is_skill_md = not include_root_files and entry == 'SKILL.md' if not (is_root_md or is_skill_md): diff --git a/agent/tools/read/read.py b/agent/tools/read/read.py index a6368ea..72876c9 100644 --- a/agent/tools/read/read.py +++ b/agent/tools/read/read.py @@ -48,7 +48,8 @@ class Read(BaseTool): self.binary_extensions = {'.exe', '.dll', '.so', '.dylib', '.bin', '.dat', '.db', '.sqlite'} self.archive_extensions = {'.zip', '.tar', '.gz', '.rar', '.7z', '.bz2', '.xz'} self.pdf_extensions = {'.pdf'} - + self.office_extensions = {'.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'} + # Readable text formats (will be read with truncation) self.text_extensions = { '.txt', '.md', '.markdown', '.rst', '.log', '.csv', '.tsv', '.json', '.xml', '.yaml', '.yml', @@ -57,7 +58,6 @@ class Read(BaseTool): '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd', '.sql', '.r', '.m', '.swift', '.kt', '.scala', '.clj', '.erl', '.ex', '.dockerfile', '.makefile', '.cmake', '.gradle', '.properties', '.ini', '.conf', '.cfg', - '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx' # Office documents } def execute(self, args: Dict[str, Any]) -> ToolResult: @@ -120,7 +120,11 @@ class Read(BaseTool): # Check if PDF if file_ext in self.pdf_extensions: return self._read_pdf(absolute_path, path, offset, limit) - + + # Check if Office document (.docx, .xlsx, .pptx, etc.) + if file_ext in self.office_extensions: + return self._read_office(absolute_path, path, file_ext, offset, limit) + # Read text file (with truncation for large files) return self._read_text(absolute_path, path, offset, limit) @@ -337,6 +341,116 @@ class Read(BaseTool): except Exception as e: return ToolResult.fail(f"Error reading file: {str(e)}") + def _read_office(self, absolute_path: str, display_path: str, file_ext: str, + offset: int = None, limit: int = None) -> ToolResult: + """Read Office documents (.docx, .xlsx, .pptx) using python-docx / openpyxl / python-pptx.""" + try: + text = self._extract_office_text(absolute_path, file_ext) + except ImportError as e: + return ToolResult.fail(str(e)) + except Exception as e: + return ToolResult.fail(f"Error reading Office document: {e}") + + if not text or not text.strip(): + return ToolResult.success({ + "content": f"[Office file {Path(absolute_path).name}: no text content could be extracted]", + }) + + all_lines = text.split('\n') + total_lines = len(all_lines) + + start_line = 0 + if offset is not None: + if offset < 0: + start_line = max(0, total_lines + offset) + else: + start_line = max(0, offset - 1) + if start_line >= total_lines: + return ToolResult.fail( + f"Error: Offset {offset} is beyond end of content ({total_lines} lines total)" + ) + + selected_content = text + user_limited_lines = None + if limit is not None: + end_line = min(start_line + limit, total_lines) + selected_content = '\n'.join(all_lines[start_line:end_line]) + user_limited_lines = end_line - start_line + elif offset is not None: + selected_content = '\n'.join(all_lines[start_line:]) + + truncation = truncate_head(selected_content) + start_line_display = start_line + 1 + output_text = "" + + if truncation.truncated: + end_line_display = start_line_display + truncation.output_lines - 1 + next_offset = end_line_display + 1 + output_text = truncation.content + output_text += f"\n\n[Showing lines {start_line_display}-{end_line_display} of {total_lines}. Use offset={next_offset} to continue.]" + elif user_limited_lines is not None and start_line + user_limited_lines < total_lines: + remaining = total_lines - (start_line + user_limited_lines) + next_offset = start_line + user_limited_lines + 1 + output_text = truncation.content + output_text += f"\n\n[{remaining} more lines in file. Use offset={next_offset} to continue.]" + else: + output_text = truncation.content + + return ToolResult.success({ + "content": output_text, + "total_lines": total_lines, + "start_line": start_line_display, + "output_lines": truncation.output_lines, + }) + + @staticmethod + def _extract_office_text(absolute_path: str, file_ext: str) -> str: + """Extract plain text from an Office document.""" + if file_ext in ('.docx', '.doc'): + try: + from docx import Document + except ImportError: + raise ImportError("Error: python-docx library not installed. Install with: pip install python-docx") + doc = Document(absolute_path) + paragraphs = [p.text for p in doc.paragraphs] + for table in doc.tables: + for row in table.rows: + paragraphs.append('\t'.join(cell.text for cell in row.cells)) + return '\n'.join(paragraphs) + + if file_ext in ('.xlsx', '.xls'): + try: + from openpyxl import load_workbook + except ImportError: + raise ImportError("Error: openpyxl library not installed. Install with: pip install openpyxl") + wb = load_workbook(absolute_path, read_only=True, data_only=True) + parts = [] + for ws in wb.worksheets: + parts.append(f"--- Sheet: {ws.title} ---") + for row in ws.iter_rows(values_only=True): + parts.append('\t'.join(str(c) if c is not None else '' for c in row)) + wb.close() + return '\n'.join(parts) + + if file_ext in ('.pptx', '.ppt'): + try: + from pptx import Presentation + except ImportError: + raise ImportError("Error: python-pptx library not installed. Install with: pip install python-pptx") + prs = Presentation(absolute_path) + parts = [] + for i, slide in enumerate(prs.slides, 1): + parts.append(f"--- Slide {i} ---") + for shape in slide.shapes: + if shape.has_text_frame: + for para in shape.text_frame.paragraphs: + text = para.text.strip() + if text: + parts.append(text) + return '\n'.join(parts) + + return "" + def _read_pdf(self, absolute_path: str, display_path: str, offset: int = None, limit: int = None) -> ToolResult: """ Read PDF file content diff --git a/agent/tools/web_fetch/web_fetch.py b/agent/tools/web_fetch/web_fetch.py index 83300b0..14acfa4 100644 --- a/agent/tools/web_fetch/web_fetch.py +++ b/agent/tools/web_fetch/web_fetch.py @@ -78,7 +78,7 @@ class WebFetch(BaseTool): name: str = "web_fetch" description: str = ( - "Fetch content from a URL. For web pages, extracts readable text. " + "Fetch content from a http/https URL. For web pages, extracts readable text. " "For document files (PDF, Word, TXT, Markdown, Excel, PPT), downloads and parses the file content. " "Supported file types: .pdf, .docx, .txt, .md, .csv, .xls, .xlsx, .ppt, .pptx" ) diff --git a/channel/qq/qq_channel.py b/channel/qq/qq_channel.py index d3a1e0f..0e3094f 100644 --- a/channel/qq/qq_channel.py +++ b/channel/qq/qq_channel.py @@ -299,7 +299,7 @@ class QQChannel(ChatChannel): self._send_identify() elif op == OP_HEARTBEAT_ACK: - logger.debug("[QQ] Heartbeat ACK received") + pass elif op == OP_HEARTBEAT: self._ws_send({"op": OP_HEARTBEAT, "d": self._last_seq}) diff --git a/channel/web/chat.html b/channel/web/chat.html index 37f28ae..d2f5c89 100644 --- a/channel/web/chat.html +++ b/channel/web/chat.html @@ -267,30 +267,44 @@
-
- - - +
+ + +
+
+ + +
+ + + +
diff --git a/channel/web/static/css/console.css b/channel/web/static/css/console.css index 8d0442f..452b9fb 100644 --- a/channel/web/static/css/console.css +++ b/channel/web/static/css/console.css @@ -344,6 +344,100 @@ transition: border-color 0.2s ease; } +/* Attachment Preview Bar */ +.attachment-preview { + display: flex; + flex-wrap: wrap; + gap: 8px; + padding: 8px 0; +} +.attachment-preview.hidden { display: none; } + +.att-thumb { + position: relative; + width: 64px; height: 64px; + border-radius: 8px; + overflow: hidden; + border: 1px solid #e2e8f0; + flex-shrink: 0; +} +.dark .att-thumb { border-color: rgba(255,255,255,0.1); } +.att-thumb img { + width: 100%; height: 100%; + object-fit: cover; +} + +.att-chip { + position: relative; + display: flex; + align-items: center; + gap: 6px; + padding: 6px 28px 6px 10px; + border-radius: 8px; + background: #f1f5f9; + border: 1px solid #e2e8f0; + font-size: 12px; + color: #475569; + max-width: 180px; +} +.dark .att-chip { background: rgba(255,255,255,0.05); border-color: rgba(255,255,255,0.1); color: #94a3b8; } +.att-uploading { opacity: 0.6; pointer-events: none; } +.att-name { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.att-remove { + position: absolute; + top: -4px; right: -4px; + width: 18px; height: 18px; + border-radius: 50%; + background: #ef4444; + color: #fff; + border: none; + font-size: 12px; + line-height: 18px; + text-align: center; + cursor: pointer; + padding: 0; + opacity: 0; + transition: opacity 0.15s; +} +.att-thumb:hover .att-remove, +.att-chip:hover .att-remove { opacity: 1; } + +/* Drag-over highlight */ +.drag-over { + background: rgba(74, 190, 110, 0.08) !important; + border-color: #4ABE6E !important; +} + +/* User message attachments */ +.user-msg-attachments { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-bottom: 6px; +} +.user-msg-image { + max-width: 200px; + max-height: 160px; + border-radius: 8px; + object-fit: cover; + cursor: pointer; +} +.user-msg-image:hover { opacity: 0.9; } +.user-msg-file { + display: flex; + align-items: center; + gap: 6px; + padding: 4px 10px; + border-radius: 6px; + background: rgba(255,255,255,0.15); + font-size: 12px; +} + /* Placeholder Cards */ .placeholder-card { transition: transform 0.2s ease, box-shadow 0.2s ease; diff --git a/channel/web/static/js/console.js b/channel/web/static/js/console.js index 1616e94..daf4f27 100644 --- a/channel/web/static/js/console.js +++ b/channel/web/static/js/console.js @@ -304,6 +304,123 @@ fetch('/config').then(r => r.json()).then(data => { const chatInput = document.getElementById('chat-input'); const sendBtn = document.getElementById('send-btn'); const messagesDiv = document.getElementById('chat-messages'); +const fileInput = document.getElementById('file-input'); +const attachmentPreview = document.getElementById('attachment-preview'); + +// Pending attachments: [{file_path, file_name, file_type, preview_url}] +// Items with _uploading=true are still in flight. +let pendingAttachments = []; +let uploadingCount = 0; + +function updateSendBtnState() { + sendBtn.disabled = uploadingCount > 0 || (!chatInput.value.trim() && pendingAttachments.length === 0); +} + +function renderAttachmentPreview() { + if (pendingAttachments.length === 0) { + attachmentPreview.classList.add('hidden'); + attachmentPreview.innerHTML = ''; + updateSendBtnState(); + return; + } + attachmentPreview.classList.remove('hidden'); + attachmentPreview.innerHTML = pendingAttachments.map((att, idx) => { + if (att._uploading) { + return `
+ + ${escapeHtml(att.file_name)} +
`; + } + if (att.file_type === 'image') { + return `
+ ${escapeHtml(att.file_name)} + +
`; + } + const icon = att.file_type === 'video' ? 'fa-film' : 'fa-file-alt'; + return `
+ + ${escapeHtml(att.file_name)} + +
`; + }).join(''); + updateSendBtnState(); +} + +function removeAttachment(idx) { + if (pendingAttachments[idx]?._uploading) return; + pendingAttachments.splice(idx, 1); + renderAttachmentPreview(); +} + +async function handleFileSelect(files) { + if (!files || files.length === 0) return; + const tasks = []; + for (const file of files) { + const placeholder = { file_name: file.name, file_type: 'file', _uploading: true }; + pendingAttachments.push(placeholder); + uploadingCount++; + renderAttachmentPreview(); + + tasks.push((async () => { + const formData = new FormData(); + formData.append('file', file); + formData.append('session_id', sessionId); + try { + const resp = await fetch('/upload', { method: 'POST', body: formData }); + const data = await resp.json(); + if (data.status === 'success') { + placeholder.file_path = data.file_path; + placeholder.file_name = data.file_name; + placeholder.file_type = data.file_type; + placeholder.preview_url = data.preview_url; + delete placeholder._uploading; + } else { + const i = pendingAttachments.indexOf(placeholder); + if (i !== -1) pendingAttachments.splice(i, 1); + } + } catch (e) { + console.error('Upload failed:', e); + const i = pendingAttachments.indexOf(placeholder); + if (i !== -1) pendingAttachments.splice(i, 1); + } + uploadingCount--; + renderAttachmentPreview(); + })()); + } + await Promise.all(tasks); +} + +fileInput.addEventListener('change', function() { + handleFileSelect(this.files); + this.value = ''; +}); + +// Drag-and-drop support on chat input area +const chatInputArea = chatInput.closest('.flex-shrink-0'); +chatInputArea.addEventListener('dragover', (e) => { e.preventDefault(); e.stopPropagation(); chatInputArea.classList.add('drag-over'); }); +chatInputArea.addEventListener('dragleave', (e) => { e.preventDefault(); e.stopPropagation(); chatInputArea.classList.remove('drag-over'); }); +chatInputArea.addEventListener('drop', (e) => { + e.preventDefault(); e.stopPropagation(); + chatInputArea.classList.remove('drag-over'); + if (e.dataTransfer.files.length) handleFileSelect(e.dataTransfer.files); +}); + +// Paste image support +chatInput.addEventListener('paste', (e) => { + const items = e.clipboardData?.items; + if (!items) return; + const files = []; + for (const item of items) { + if (item.kind === 'file') { + files.push(item.getAsFile()); + } + } + if (files.length) { + e.preventDefault(); + handleFileSelect(files); + } +}); chatInput.addEventListener('compositionstart', () => { isComposing = true; }); chatInput.addEventListener('compositionend', () => { setTimeout(() => { isComposing = false; }, 100); }); @@ -314,7 +431,7 @@ chatInput.addEventListener('input', function() { const newH = Math.min(scrollH, 180); this.style.height = newH + 'px'; this.style.overflowY = scrollH > 180 ? 'auto' : 'hidden'; - sendBtn.disabled = !this.value.trim(); + updateSendBtnState(); }); chatInput.addEventListener('keydown', function(e) { @@ -346,25 +463,37 @@ document.querySelectorAll('.example-card').forEach(card => { function sendMessage() { const text = chatInput.value.trim(); - if (!text) return; + if (!text && pendingAttachments.length === 0) return; const ws = document.getElementById('welcome-screen'); if (ws) ws.remove(); const timestamp = new Date(); - addUserMessage(text, timestamp); + const attachments = [...pendingAttachments]; + addUserMessage(text, timestamp, attachments); const loadingEl = addLoadingIndicator(); chatInput.value = ''; chatInput.style.height = '42px'; chatInput.style.overflowY = 'hidden'; + pendingAttachments = []; + renderAttachmentPreview(); sendBtn.disabled = true; + const body = { session_id: sessionId, message: text, stream: true, timestamp: timestamp.toISOString() }; + if (attachments.length > 0) { + body.attachments = attachments.map(a => ({ + file_path: a.file_path, + file_name: a.file_name, + file_type: a.file_type, + })); + } + fetch('/message', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ session_id: sessionId, message: text, stream: true, timestamp: timestamp.toISOString() }) + body: JSON.stringify(body) }) .then(r => r.json()) .then(data => { @@ -574,13 +703,27 @@ function startPolling() { poll(); } -function createUserMessageEl(content, timestamp) { +function createUserMessageEl(content, timestamp, attachments) { const el = document.createElement('div'); el.className = 'flex justify-end px-4 sm:px-6 py-3'; + + let attachHtml = ''; + if (attachments && attachments.length > 0) { + const items = attachments.map(a => { + if (a.file_type === 'image') { + return `${escapeHtml(a.file_name)}`; + } + const icon = a.file_type === 'video' ? 'fa-film' : 'fa-file-alt'; + return `
${escapeHtml(a.file_name)}
`; + }).join(''); + attachHtml = `
${items}
`; + } + + const textHtml = content ? renderMarkdown(content) : ''; el.innerHTML = `
- ${renderMarkdown(content)} + ${attachHtml}${textHtml}
${formatTime(timestamp)}
@@ -635,8 +778,8 @@ function createBotMessageEl(content, timestamp, requestId, toolCalls) { return el; } -function addUserMessage(content, timestamp) { - const el = createUserMessageEl(content, timestamp); +function addUserMessage(content, timestamp, attachments) { + const el = createUserMessageEl(content, timestamp, attachments); messagesDiv.appendChild(el); scrollChatToBottom(); } diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py index 6327a79..6f5dd98 100644 --- a/channel/web/web_channel.py +++ b/channel/web/web_channel.py @@ -20,6 +20,17 @@ from common.log import logger from common.singleton import singleton from config import conf +IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".svg"} +VIDEO_EXTENSIONS = {".mp4", ".webm", ".avi", ".mov", ".mkv"} + + +def _get_upload_dir() -> str: + from common.utils import expand_path + ws_root = expand_path(conf().get("agent_workspace", "~/cow")) + tmp_dir = os.path.join(ws_root, "tmp") + os.makedirs(tmp_dir, exist_ok=True) + return tmp_dir + class WebMessage(ChatMessage): def __init__( @@ -152,10 +163,53 @@ class WebChannel(ChatChannel): return on_event + def upload_file(self): + """Handle file upload via multipart/form-data. Save to workspace/tmp/ and return metadata.""" + try: + params = web.input(file={}, session_id="") + file_obj = params.get("file") + session_id = params.get("session_id", "") + if file_obj is None or not hasattr(file_obj, "filename") or not file_obj.filename: + return json.dumps({"status": "error", "message": "No file uploaded"}) + + upload_dir = _get_upload_dir() + + original_name = file_obj.filename + ext = os.path.splitext(original_name)[1].lower() + safe_name = f"web_{uuid.uuid4().hex[:8]}{ext}" + save_path = os.path.join(upload_dir, safe_name) + + with open(save_path, "wb") as f: + f.write(file_obj.read() if hasattr(file_obj, "read") else file_obj.value) + + if ext in IMAGE_EXTENSIONS: + file_type = "image" + elif ext in VIDEO_EXTENSIONS: + file_type = "video" + else: + file_type = "file" + + preview_url = f"/uploads/{safe_name}" + + logger.info(f"[WebChannel] File uploaded: {original_name} -> {save_path} ({file_type})") + + return json.dumps({ + "status": "success", + "file_path": save_path, + "file_name": original_name, + "file_type": file_type, + "preview_url": preview_url, + }, ensure_ascii=False) + + except Exception as e: + logger.error(f"[WebChannel] File upload error: {e}", exc_info=True) + return json.dumps({"status": "error", "message": str(e)}) + def post_message(self): """ Handle incoming messages from users via POST request. Returns a request_id for tracking this specific request. + Supports optional attachments (file paths from /upload). """ try: data = web.data() @@ -163,6 +217,25 @@ class WebChannel(ChatChannel): session_id = json_data.get('session_id', f'session_{int(time.time())}') prompt = json_data.get('message', '') use_sse = json_data.get('stream', True) + attachments = json_data.get('attachments', []) + + # Append file references to the prompt (same format as QQ channel) + if attachments: + file_refs = [] + for att in attachments: + ftype = att.get("file_type", "file") + fpath = att.get("file_path", "") + if not fpath: + continue + if ftype == "image": + file_refs.append(f"[图片: {fpath}]") + elif ftype == "video": + file_refs.append(f"[视频: {fpath}]") + else: + file_refs.append(f"[文件: {fpath}]") + if file_refs: + prompt = prompt + "\n" + "\n".join(file_refs) + logger.info(f"[WebChannel] Attached {len(file_refs)} file(s) to message") request_id = self._generate_request_id() self.request_to_session[request_id] = session_id @@ -300,6 +373,8 @@ class WebChannel(ChatChannel): urls = ( '/', 'RootHandler', '/message', 'MessageHandler', + '/upload', 'UploadHandler', + '/uploads/(.*)', 'UploadsHandler', '/poll', 'PollHandler', '/stream', 'StreamHandler', '/chat', 'ChatHandler', @@ -356,6 +431,34 @@ class MessageHandler: return WebChannel().post_message() +class UploadHandler: + def POST(self): + web.header('Content-Type', 'application/json; charset=utf-8') + return WebChannel().upload_file() + + +class UploadsHandler: + def GET(self, file_name): + """Serve uploaded files from workspace/tmp/ for preview.""" + try: + upload_dir = _get_upload_dir() + full_path = os.path.normpath(os.path.join(upload_dir, file_name)) + if not os.path.abspath(full_path).startswith(os.path.abspath(upload_dir)): + raise web.notfound() + if not os.path.isfile(full_path): + raise web.notfound() + content_type = mimetypes.guess_type(full_path)[0] or "application/octet-stream" + web.header('Content-Type', content_type) + web.header('Cache-Control', 'public, max-age=86400') + with open(full_path, 'rb') as f: + return f.read() + except web.HTTPError: + raise + except Exception as e: + logger.error(f"[WebChannel] Error serving upload: {e}") + raise web.notfound() + + class PollHandler: def POST(self): return WebChannel().poll_response()