From 2950570561aa40e79d6bf8c80d523f12e453ee53 Mon Sep 17 00:00:00 2001 From: digua Date: Sun, 12 Apr 2026 15:49:06 +0800 Subject: [PATCH] =?UTF-8?q?docs:=20=E6=96=B0=E5=A2=9E=E5=AE=98=E7=BD=91?= =?UTF-8?q?=E6=96=87=E6=A1=A3=E5=B9=B6=E6=94=AF=E6=8C=81=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=8C=96=E5=90=8C=E6=AD=A5=E5=92=8C=E6=9E=84=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/sync-docs.yml | 50 ++++ docs/cn/dev/design-philosophy.md | 28 +++ docs/cn/index.md | 8 + docs/cn/intro.md | 32 +++ docs/cn/other/community.md | 21 ++ docs/cn/quick-start.md | 7 + docs/cn/standard/ai-converter.md | 88 +++++++ docs/cn/standard/chatlab-format.md | 391 +++++++++++++++++++++++++++++ docs/cn/usage/faq.md | 23 ++ docs/cn/usage/how-to-config-ai.md | 31 +++ docs/cn/usage/how-to-export.md | 5 + docs/cn/usage/how-to-import.md | 18 ++ docs/cn/usage/index.md | 5 + docs/cn/usage/qa.md | 50 ++++ docs/cn/usage/troubleshooting.md | 81 ++++++ docs/en/chatlab-format.md | 5 + docs/en/index.md | 8 + docs/en/intro.md | 32 +++ docs/en/other/community.md | 15 ++ docs/en/quick-start.md | 29 +++ docs/en/standard/ai-converter.md | 88 +++++++ docs/en/standard/chatlab-format.md | 314 +++++++++++++++++++++++ docs/en/usage/how-to-config-ai.md | 31 +++ docs/en/usage/how-to-export.md | 100 ++++++++ docs/en/usage/how-to-import.md | 18 ++ docs/en/usage/index.md | 5 + docs/en/usage/troubleshooting.md | 81 ++++++ docs/env.d.ts | 4 + 28 files changed, 1568 insertions(+) create mode 100644 .github/workflows/sync-docs.yml create mode 100644 docs/cn/dev/design-philosophy.md create mode 100644 docs/cn/index.md create mode 100644 docs/cn/intro.md create mode 100644 docs/cn/other/community.md create mode 100644 docs/cn/quick-start.md create mode 100644 docs/cn/standard/ai-converter.md create mode 100644 docs/cn/standard/chatlab-format.md create mode 100644 docs/cn/usage/faq.md create mode 100644 docs/cn/usage/how-to-config-ai.md create mode 100644 docs/cn/usage/how-to-export.md create mode 100644 docs/cn/usage/how-to-import.md create mode 100644 docs/cn/usage/index.md create mode 100644 docs/cn/usage/qa.md create mode 100644 docs/cn/usage/troubleshooting.md create mode 100644 docs/en/chatlab-format.md create mode 100644 docs/en/index.md create mode 100644 docs/en/intro.md create mode 100644 docs/en/other/community.md create mode 100644 docs/en/quick-start.md create mode 100644 docs/en/standard/ai-converter.md create mode 100644 docs/en/standard/chatlab-format.md create mode 100644 docs/en/usage/how-to-config-ai.md create mode 100644 docs/en/usage/how-to-export.md create mode 100644 docs/en/usage/how-to-import.md create mode 100644 docs/en/usage/index.md create mode 100644 docs/en/usage/troubleshooting.md create mode 100644 docs/env.d.ts diff --git a/.github/workflows/sync-docs.yml b/.github/workflows/sync-docs.yml new file mode 100644 index 00000000..e14de915 --- /dev/null +++ b/.github/workflows/sync-docs.yml @@ -0,0 +1,50 @@ +name: Sync docs to chatlab.fun + +on: + push: + branches: [main] + paths: + - 'docs/**' + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - name: Checkout ChatLab + uses: actions/checkout@v4 + with: + sparse-checkout: docs + + - name: Checkout chatlab.fun + uses: actions/checkout@v4 + with: + repository: hellodigua/chatlab.fun + token: ${{ secrets.CHATLAB_FUN_TOKEN }} + path: chatlab-fun + + - name: Sync docs + run: | + rsync -av --delete \ + --exclude='.vitepress' \ + --exclude='public' \ + docs/ chatlab-fun/docs/ + + - name: Commit and push + id: push + working-directory: chatlab-fun + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add docs/ + if git diff --cached --quiet; then + echo "No changes to commit" + echo "pushed=false" >> $GITHUB_OUTPUT + else + git commit -m "sync: update docs from ChatLab@${GITHUB_SHA::7}" + git push + echo "pushed=true" >> $GITHUB_OUTPUT + fi + + - name: Trigger EdgeOne deploy + if: steps.push.outputs.pushed == 'true' + run: curl -s -X POST "${{ secrets.WEBSITE_DEPLOY_HOOK_URL }}" diff --git a/docs/cn/dev/design-philosophy.md b/docs/cn/dev/design-philosophy.md new file mode 100644 index 00000000..921b46ab --- /dev/null +++ b/docs/cn/dev/design-philosophy.md @@ -0,0 +1,28 @@ +--- +outline: deep +--- + +# 设计理念 + +ChatLab 在开发之初,就确立了以下核心设计原则,它们指导着软件的每一项功能实现与架构选择: + +### 1. 彻底的本地化架构 + +- **数据主权**:我们认为聊天记录是极其私密的个人资产,因此 ChatLab 采用本地优先架构,核心数据库始终留存在用户设备中。 +- **隐私屏障**:通过适配 Ollama 等工具支持本地 LLM 模型,确保即使在深度语义分析时,也可以实现完全的断网运行。 +- **前置脱敏**:对于必须调用云端 API 的场景,系统内置了脱敏引擎,在数据离开本地前对敏感信息进行去标识化处理。 + +### 2. 语义化与非线性探索 + +- **超越检索**:传统的关键词搜索无法还原社交语境,ChatLab 引入 AI Agent 旨在实现语义层面的理解。 +- **多维透视**:通过 AI 与 SQL 引擎的结合,用户可以从情感倾向、社交图谱、话题聚类等多个非线性维度重新审视数据。 + +### 3. 低门槛的分析体验 + +- **抹平技术鸿沟**:我们致力于将其封装为直观的可视化操作。 +- **开箱即用**:用户无需配置复杂的编程环境或运行脚本,即可拥有专业级的数据拆解能力。 + +### 4. 开放的标准与生态 (Open Ecosystem) + +- **透明可追溯**:作为处理私密数据的工具,ChatLab 坚持代码完全开源,接受全球开发者的安全监督。 +- **数据交换规范**:我们正在建立一套通用的聊天记录交换格式,旨在打破不同社交平台间的“数据孤岛”,实现跨平台的数据兼容与迁移。 diff --git a/docs/cn/index.md b/docs/cn/index.md new file mode 100644 index 00000000..b2f63fcf --- /dev/null +++ b/docs/cn/index.md @@ -0,0 +1,8 @@ +--- +layout: page +sidebar: false +title: ChatLab — 重构你的社交记忆 +titleTemplate: false +--- + + diff --git a/docs/cn/intro.md b/docs/cn/intro.md new file mode 100644 index 00000000..b86145a7 --- /dev/null +++ b/docs/cn/intro.md @@ -0,0 +1,32 @@ +--- +outline: deep +--- + +# ChatLab 介绍 + +ChatLab 是一个免费、开源、本地化的聊天记录分析软件。 + +在数字时代,聊天记录早已不再是简单的文本文件,它是长达十年的社交关系脉络,是亲人珍贵的语音片段,更是我们外挂在数字世界的情感大脑。 + +ChatLab 的诞生,就是为了**让每个用户都能安全地分析、回顾属于自己的社交记忆。** + +## 如何使用 + +想要分析你的聊天记录,你需要完成以下两步: + +1. 导出聊天记录 +2. 导入聊天记录 + +由于每个聊天软件的导出方式不同,具体步骤请参考 [导出聊天记录](./usage/how-to-export.md)。 + +## 下一步 + + + +遇到了导入、AI 对话错误等问题,请参考 [常见问题](./usage/faq.md) 解决。 + +如果您导出的聊天记录格式不在当前支持范围,请参考 [AI 辅助转换指南](./standard/ai-converter.md)。 + +如果您是开发者,并支持了其他聊天应用的聊天记录导出,欢迎兼容 [聊天数据交换标准化格式](./standard/chatlab-format.md)。 + +如果还有其他问题,欢迎加入社区反馈与交流:[加入社群](./other/community.md) diff --git a/docs/cn/other/community.md b/docs/cn/other/community.md new file mode 100644 index 00000000..7d24fc73 --- /dev/null +++ b/docs/cn/other/community.md @@ -0,0 +1,21 @@ +# 加入社群 + +欢迎加入社群获取最新进展、反馈问题,或通过邮箱联系开发者。 + +## 微信群 + + + +## QQ 群 + +> **注意:QQ群仅讨论 QQ 聊天记录分析相关的话题,其他聊天软件的问题不提供解答。** + +群号:**1070511173** + +## Discord + +[点击邀请链接](https://discord.gg/zWZnxNtDNz) 加入讨论组 + +## 联系开发者 + +如果有其他问题咨询讨论,请邮件联系 hello@digua.me diff --git a/docs/cn/quick-start.md b/docs/cn/quick-start.md new file mode 100644 index 00000000..9d360062 --- /dev/null +++ b/docs/cn/quick-start.md @@ -0,0 +1,7 @@ +--- +outline: deep +--- + +# 快速上手 + +待完善 diff --git a/docs/cn/standard/ai-converter.md b/docs/cn/standard/ai-converter.md new file mode 100644 index 00000000..b5063618 --- /dev/null +++ b/docs/cn/standard/ai-converter.md @@ -0,0 +1,88 @@ +--- +outline: deep +--- + +# AI 辅助转换指南 + +如果你的聊天记录格式(如 CSV, HTML, TXT 或其他数据库导出)目前不被 ChatLab 直接支持,你可以利用 AI(如 ChatGPT, Claude, DeepSeek 等)快速编写一个转换脚本,将你的数据转换为 ChatLab 标准格式。 + +## 准备工作 + +1. **下载标准规范**:点击下载 ChatLab 标准格式规范 v0.0.2 (.md) +2. **准备数据**:准备好你导出的原始聊天记录文件(如果是在线服务,建议仅提供几百条脱敏后的样本即可)。 + +## 选择目标格式 + +请根据你的数据量大小,选择合适的提示词。 + +### 场景一:中小规模数据 (推荐) + +- **目标格式**:JSON (`.json`) +- **适用场景**:记录数 < 100 万条,文件体积 < 100MB。 +- **特点**:结构清晰,兼容性最好。 + +#### 复制 JSON 转换提示词 + +```markdown +**角色设定**: +你是一个精通数据处理和脚本编写的专家。 + +**任务目标**: +请根据我提供的【ChatLab 标准格式规范】(chatlab-format.md),编写一个脚本,将我上传的【原始聊天记录】转换为符合该规范的 **JSON 格式**。 + +**执行要求**: + +1. **分析结构**:分析原始聊天记录的文本规律或数据结构。 +2. **字段映射**: + - 将原始字段映射到 ChatLab 标准字段(`timestamp`, `sender`, `content`, `type` 等)。 + - 如果原始数据缺少 `sender` (用户 ID),请根据 `accountName` (用户名) 自动生成一个唯一的哈希值或虚拟 ID。 + - `type` 默认为 0 (文本)。如果能从内容中识别出图片、语音等类型,请尝试映射。 +3. **脚本生成**: + - 请编写一个**完整的、可执行的脚本**(推荐 Python 或 Node.js)。 + - **输出结构**:脚本应构建一个包含 `chatlab`, `meta`, `members`, `messages` 的完整 JSON 对象,并一次性写入文件。 + - 脚本需包含必要的错误处理,并打印进度。 +4. **结果验证**: + - 请确保生成的 JSON 结构严格符合 `chatlab-format.md` 中的定义。 + +**输出**: +请直接提供代码,并简要说明如何运行该脚本。 +``` + +### 场景二:超大规模数据 + +- **目标格式**:JSONL (`.jsonl`) +- **适用场景**:记录数 > 100 万条,或文件体积巨大。 +- **特点**:流式读写,内存占用极低,不会因为数据量大而崩溃。 + +#### 复制 JSONL 转换提示词 + +```markdown +**角色设定**: +你是一个精通大数据处理和流式计算的专家。 + +**任务目标**: +请根据我提供的【ChatLab 标准格式规范】(chatlab-format.md),编写一个脚本,将我上传的【原始聊天记录】转换为符合该规范的 **JSONL (JSON Lines) 格式**。 + +**执行要求**: + +1. **分析结构**:分析原始聊天记录的文本规律。 +2. **流式处理**: + - **必须采用流式读写**(Line-by-Line)的方式,不要一次性将所有数据加载到内存中。 + - 逐行读取原始文件,逐行写入目标文件。 +3. **JSONL 结构要求**: + - **第一行**:必须写入 `_type: "header"` 行(包含 `chatlab` 和 `meta` 信息)。 + - **成员信息**:如果可能,先扫描一遍或在处理过程中收集成员信息,写入 `_type: "member"` 行。 + - **消息记录**:每一条聊天记录写入一行 `_type: "message"`。 +4. **脚本生成**: + - 请编写一个**高效的 Python 脚本**。 + - 确保处理过程内存占用恒定,适合处理 GB 级别的大文件。 + +**输出**: +请直接提供代码,并简要说明如何运行该脚本。 +``` + +## 后续步骤 + +1. **运行脚本**:在本地环境中运行 AI 生成的脚本。 +2. **检查结果**:打开生成的文件,确认格式是否正确。 +3. **导入 ChatLab**:将生成的文件导入 ChatLab 进行分析。 diff --git a/docs/cn/standard/chatlab-format.md b/docs/cn/standard/chatlab-format.md new file mode 100644 index 00000000..5bf6f854 --- /dev/null +++ b/docs/cn/standard/chatlab-format.md @@ -0,0 +1,391 @@ +--- +outline: deep +--- + +📥 点击此处下载本规范文档 (.md) + +# 聊天数据交换标准化格式 + +> v0.0.2 + +ChatLab 定义了一套标准的聊天记录数据交换格式,用于支持多平台数据的统一导入和分析。 + +只要你将聊天记录转为该格式,那么就可以被 ChatLab 解析并使用其分析能力。 + +::: warning 注意 +该格式规范目前仍处于早期制定阶段,部分字段和结构可能会在后续版本中调整。 +::: + +## 概述 + +### 支持的文件格式 + +| 格式 | 扩展名 | 适用场景 | +| --------- | -------- | ------------------------------------------------- | +| **JSON** | `.json` | 中小型记录(<100 万条),结构清晰,易于阅读 | +| **JSONL** | `.jsonl` | 超大规模记录(>100 万条),流式处理,内存占用恒定 | + +### 格式对比 + +| 特性 | JSON | JSONL | +| ------------ | ---------------------- | ----------------------- | +| 内存占用 | 需加载完整结构 | 逐行处理,恒定 (~100MB) | +| 文件大小限制 | ~1GB(取决于内存) | 无实际限制 | +| 追加写入 | - 需重写整个文件 | ✅ 直接追加行 | +| 错误恢复 | 单处错误整文件失效 | 可跳过错误行继续 | +| 可读性 | ⭐⭐⭐ 易于阅读 | ⭐⭐ 每行一条记录 | +| 推荐场景 | 小中型记录 (<100 万条) | 大型记录 (>100 万条) | + +## 快速说明 + +以下是一个**最小化**的 ChatLab 格式示例,只包含必要字段: + +```json +{ + "chatlab": { + "version": "0.0.2", + "exportedAt": 1703001600 + }, + "meta": { + "name": "我的群聊", + "platform": "qq", + "type": "group" + }, + "members": [ + { + "platformId": "123456", + "accountName": "张三" + } + ], + "messages": [ + { + "sender": "123456", + "accountName": "张三", + "timestamp": 1703001600, + "type": 0, + "content": "大家好!" + } + ] +} +``` + +--- + +## JSON 格式详细说明 + +### 文件头 (chatlab) + +| 字段 | 类型 | 必填 | 说明 | +| ------------- | ------ | ---- | ---------------------------- | +| `version` | string | ✅ | 格式版本号,当前为 `"0.0.2"` | +| `exportedAt` | number | ✅ | 导出时间(秒级 Unix 时间戳) | +| `generator` | string | - | 生成工具名称 | +| `description` | string | - | 描述信息 | + +### 元信息 (meta) + +| 字段 | 类型 | 必填 | 说明 | +| ------------- | ------------- | ---- | -------------------------------------------------------- | +| `name` | string | ✅ | 群名或对话名 | +| `platform` | string | ✅ | 平台标识,如 `qq` / `wechat` / `discord` / `whatsapp` 等 | +| `type` | string | ✅ | 聊天类型:`group`(群聊)/ `private`(私聊) | +| `groupId` | string | - | 群 ID(仅群聊) | +| `groupAvatar` | string | - | 群头像(Data URL 格式) | +| `ownerId` | string | - | 所有者/导出者的 platformId | + +### 成员 (members) + +| 字段 | 类型 | 必填 | 说明 | +| --------------- | ------------ | ---- | ------------------------- | +| `platformId` | string | ✅ | 用户唯一标识 | +| `accountName` | string | ✅ | 账号名称 | +| `groupNickname` | string | - | 群昵称(仅群聊) | +| `aliases` | string[] | - | 用户自定义别名 | +| `avatar` | string | - | 用户头像(Data URL 格式) | +| `roles` | MemberRole[] | - | 成员角色(可多个) | + +#### 角色 (roles) + +成员可以拥有一个或多个角色,用于标识群主、管理员等身份: + +| 字段 | 类型 | 必填 | 说明 | +| ------ | ------ | ---- | --------------------------------------- | +| `id` | string | ✅ | 角色标识:`owner` / `admin` / 自定义 ID | +| `name` | string | - | 角色显示名称(自定义角色需要) | + +**标准角色 ID:** + +| ID | 说明 | +| ------- | ----------- | +| `owner` | 群主/创建者 | +| `admin` | 管理员 | + +**角色示例:** + +```json +// 群主 +"roles": [{ "id": "owner" }] + +// 管理员 +"roles": [{ "id": "admin" }] + +// 多角色 +"roles": [ + { "id": "owner" }, + { "id": "tech-team", "name": "技术组" }, + { "id": "vip", "name": "VIP会员" } +] +``` + +### 消息 (messages) + +| 字段 | 类型 | 必填 | 说明 | +| ------------------- | -------------- | ---- | --------------------------------- | +| `sender` | string | ✅ | 发送者的 `platformId` | +| `accountName` | string | ✅ | 发送时的账号名称 | +| `groupNickname` | string | - | 发送时的群昵称 | +| `timestamp` | number | ✅ | 秒级 Unix 时间戳 | +| `type` | number | ✅ | 消息类型(见下方对照表) | +| `content` | string \| null | ✅ | 消息内容(非文本消息可为 `null`) | +| `platformMessageId` | string | - | 消息的平台原始 ID | +| `replyToMessageId` | string | - | 回复的目标消息 ID | + +#### 消息 ID 与回复关系说明 + +**`platformMessageId`**(消息的平台原始 ID): + +- 存储消息在原始平台上的唯一标识(如 Discord 的 snowflake ID、QQ 的消息 ID) +- 用于在查询时关联 `replyToMessageId`,以显示被回复消息的内容 +- 如果平台不提供消息 ID,可省略此字段 + +**`replyToMessageId`**(回复的目标消息 ID): + +- 存储被回复消息的**平台原始 ID** +- 通过与其他消息的 `platformMessageId` 关联,可查询被回复消息的内容和发送者 +- 仅当消息是回复类型时才有意义 +- 如果平台不支持或数据不包含回复关系,可省略此字段 + +--- + +## 消息类型对照表 + +::: tip 提示 +若您的聊天记录中有其他特殊类型需要支持,请提交 issue 说明情况,我们会评估是否加入标准消息类型中。 +::: + +### 基础消息类型 (0-19) + +| 值 | 名称 | 说明 | +| --- | -------- | ----------- | +| 0 | TEXT | 文本消息 | +| 1 | IMAGE | 图片 | +| 2 | VOICE | 语音 | +| 3 | VIDEO | 视频 | +| 4 | FILE | 文件 | +| 5 | EMOJI | 表情包/贴纸 | +| 7 | LINK | 链接/卡片 | +| 8 | LOCATION | 位置 | + +### 交互消息类型 (20-39) + +| 值 | 名称 | 说明 | +| --- | ---------- | ---------------------- | +| 20 | RED_PACKET | 红包 | +| 21 | TRANSFER | 转账 | +| 22 | POKE | 拍一拍/戳一戳 | +| 23 | CALL | 语音/视频通话 | +| 24 | SHARE | 分享(音乐、小程序等) | +| 25 | REPLY | 引用回复 | +| 26 | FORWARD | 转发消息 | +| 27 | CONTACT | 名片消息 | + +### 系统消息类型 (80+) + +| 值 | 名称 | 说明 | +| --- | ------ | ------------------------------ | +| 80 | SYSTEM | 系统消息(入群/退群/群公告等) | +| 81 | RECALL | 撤回消息 | +| 99 | OTHER | 其他/未知 | + +## 头像格式说明 + +头像字段 `avatar` 和 `groupAvatar` 支持两种格式: + +### 1. Data URL + +嵌入式格式,图片数据直接编码在文件中,离线可用: + +``` +data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD... +``` + +支持的图片 MIME 类型: + +- `image/jpeg` - JPEG 格式(推荐,体积较小) +- `image/png` - PNG 格式 +- `image/gif` - GIF 格式 +- `image/webp` - WebP 格式 + +### 2. 网络 URL + +外链格式,图片存储在网络服务器,体积更小但需网络访问: + +``` +https://example.com/avatars/user123.jpg +``` + +::: tip 建议 + +- 如果需要离线使用或长期存档,推荐使用 Data URL 格式 +- 导出 Data URL 时建议将头像压缩为 100×100 像素以内,以减小文件体积 +- 如果头像来自可靠的长期有效的 CDN,可使用网络 URL 以减小文件体积 + ::: + +## 完整示例 + +### 群聊示例(含可选字段) + +```json +{ + "chatlab": { + "version": "0.0.2", + "exportedAt": 1703001600, + "generator": "My Converter Tool", + "description": "2024年技术交流群聊天记录备份" + }, + "meta": { + "name": "技术交流群", + "platform": "wechat", + "type": "group", + "groupId": "38988428513", + "groupAvatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg...", + "ownerId": "abc123" + }, + "members": [ + { + "platformId": "abc123", + "accountName": "张三", + "groupNickname": "群主-张三", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg...", + "roles": [{ "id": "owner" }] + }, + { + "platformId": "def456", + "accountName": "李四", + "groupNickname": "管理员", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg...", + "roles": [{ "id": "admin" }] + } + ], + "messages": [ + { + "platformMessageId": "msg_001", + "sender": "abc123", + "accountName": "张三", + "groupNickname": "群主-张三", + "timestamp": 1703001600, + "type": 0, + "content": "大家好!欢迎加入技术交流群~" + }, + { + "platformMessageId": "msg_002", + "sender": "def456", + "accountName": "李四", + "groupNickname": "管理员", + "timestamp": 1703001610, + "type": 25, + "content": "收到!", + "replyToMessageId": "msg_001" + } + ] +} +``` + +### 私聊示例 + +```json +{ + "chatlab": { + "version": "0.0.2", + "exportedAt": 1703001600 + }, + "meta": { + "name": "与小明的对话", + "platform": "qq", + "type": "private" + }, + "members": [ + { + "platformId": "123456789", + "accountName": "我", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + }, + { + "platformId": "987654321", + "accountName": "小明", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + } + ], + "messages": [ + { + "sender": "123456789", + "accountName": "我", + "timestamp": 1703001600, + "type": 0, + "content": "在吗?" + } + ] +} +``` + +## JSONL 流式格式 + +JSONL(JSON Lines)格式适用于**超大规模聊天记录**(>100 万条),可避免内存溢出问题。 + +### 格式特点 + +- 每行一个 JSON 对象 +- 通过 `_type` 字段区分行类型:`header` / `member` / `message` +- 内存占用恒定(约 100MB),支持 GB 级文件 +- 支持流式写入,可边导出边追加 + +### 行类型说明 + +| `_type` | 说明 | 是否必需 | +| --------- | -------------------------------- | --------------- | +| `header` | 文件头,包含 `chatlab` 和 `meta` | ✅ 必须在第一行 | +| `member` | 成员信息 | - 可选 | +| `message` | 消息记录 | ✅ 至少一条 | + +### 完整示例 + +```jsonl +{"_type":"header","chatlab":{"version":"0.0.2","exportedAt":1703001600},"meta":{"name":"技术交流群","platform":"qq","type":"group"}} +{"_type":"member","platformId":"123456","accountName":"张三","groupNickname":"群主","roles":[{"id":"owner"}]} +{"_type":"member","platformId":"789012","accountName":"李四"} +{"_type":"message","platformMessageId":"msg_001","sender":"123456","accountName":"张三","groupNickname":"群主","timestamp":1703001600,"type":0,"content":"大家好!"} +{"_type":"message","sender":"789012","accountName":"李四","timestamp":1703001610,"type":0,"content":"你好!"} +{"_type":"message","sender":"123456","accountName":"张三","groupNickname":"群主","timestamp":1703001620,"type":1,"content":"[图片]"} +``` + +### 解析规则 + +1. **第一行必须是 header**:包含 `chatlab` 版本和 `meta` 元信息 +2. **成员行在消息之前**:可选,如果省略,成员信息会从消息中自动收集 +3. **消息按时间顺序排列**:建议按 `timestamp` 升序排列 +4. **每行独立完整**:单行解析错误可跳过继续处理 +5. **支持注释行**:以 `#` 开头的行会被跳过(可用于添加备注) + +::: warning 注意 + +- 每行必须是**有效的 JSON**(不能跨行) +- 行之间用换行符 `\n` 分隔 + +::: + +## 版本历史 + +| 版本 | 日期 | 变更 | +| ----- | ---------- | ------------------------------------------------------------------------------ | +| 0.0.1 | 2025-12-22 | 初始版本 | +| 0.0.2 | 2026-01-09 | 新增 roles、ownerId、platformMessageId、replyToMessageId 字段;新增 JSONL 格式 | diff --git a/docs/cn/usage/faq.md b/docs/cn/usage/faq.md new file mode 100644 index 00000000..23f1ae6c --- /dev/null +++ b/docs/cn/usage/faq.md @@ -0,0 +1,23 @@ +--- +outline: deep +--- + +# 常见问题 + +这里汇总 ChatLab 使用中高频出现的问题与处理思路。 + +## 导出问题 + +待补充。 + +## 导入问题 + +待补充。 + +## AI相关问题 + +待补充。 + +## 软件异常报错问题 + +待补充。 diff --git a/docs/cn/usage/how-to-config-ai.md b/docs/cn/usage/how-to-config-ai.md new file mode 100644 index 00000000..88275ecc --- /dev/null +++ b/docs/cn/usage/how-to-config-ai.md @@ -0,0 +1,31 @@ +--- +outline: deep +--- + +# 如何配置 AI 模型 + +## 在线 AI 模型 + +这里以 deepseek 为例,其他的模型请自行搜索配置方法: + +1. 访问 [Deepseek 官网](https://www.deepseek.com/),选择 API 开放平台,注册并登录 + +![](/cn/img/ai-guide/1.png) + +2. 选择左侧充值,充值你所需要的金额(建议先充 10 块钱,足够用很久了) + +![](/cn/img/ai-guide/2.png) + +3. 选择左侧 API Keys,点击创建 API key,随便填写一个标题,然后复制 API Key + +![](/cn/img/ai-guide/3.png) + +4. 打开 ChatLab,右下角点击设置,然后「模型配置」>「添加新配置」 + +把刚才复制的 API Key 填写到 API Keys 中,点击验证确认没问题后,点击右下角添加后,即可开始使用 AI 相关功能 + +![](/cn/img/ai-guide/4.png) + +5. 在左侧的用量信息中,你可以查看你充值和消费的额度 + +![](/cn/img/ai-guide/5.png) diff --git a/docs/cn/usage/how-to-export.md b/docs/cn/usage/how-to-export.md new file mode 100644 index 00000000..dcf46719 --- /dev/null +++ b/docs/cn/usage/how-to-export.md @@ -0,0 +1,5 @@ +--- +outline: deep +--- + + diff --git a/docs/cn/usage/how-to-import.md b/docs/cn/usage/how-to-import.md new file mode 100644 index 00000000..6b3fdbc8 --- /dev/null +++ b/docs/cn/usage/how-to-import.md @@ -0,0 +1,18 @@ +--- +outline: deep +--- + +# 导入聊天记录指南 + +完成导出后,您只需在 ChatLab 的首页: + +1. 将导出的**数据文件**直接拖入上传区域。 +2. 等待 ChatLab 解析完成即可。 + +## BUG 排查 + +如果导入失败,可以通过日志快速排查问题: + +软件左下角「设置」 > 「基础设置」 > 「日志文件」,打开该目录,该目录下有个「import」目录,就是所有导入的日志记录了。 + +如果您看不懂,可以通过 Github issue 提交问题。 diff --git a/docs/cn/usage/index.md b/docs/cn/usage/index.md new file mode 100644 index 00000000..ae639aa5 --- /dev/null +++ b/docs/cn/usage/index.md @@ -0,0 +1,5 @@ +--- +outline: deep +--- + +# USAGE \ No newline at end of file diff --git a/docs/cn/usage/qa.md b/docs/cn/usage/qa.md new file mode 100644 index 00000000..130e52e6 --- /dev/null +++ b/docs/cn/usage/qa.md @@ -0,0 +1,50 @@ +# Q&A + +## 未来会支持音频、图片导入吗? + +不确定,目前的文本分析功能仍然有非常多的 TODO 需要实现,计划文本分析的功能完善之后再考虑音频和图片的分析。 + +## 如何直接访问本地数据库 + +ChatLab 使用 SQLite 存储聊天记录,你可以用任何 SQLite 客户端工具直接查看数据。 + +### 数据库位置 + +你可直接通过软件的 设置 > 存储管理 > 聊天记录数据库 > 打开,打开数据库所在文件夹。 + +| 平台 | 路径 | +| ------- | ------------------------------------------------------- | +| macOS | `~/Library/Application Support/ChatLab/data/databases/` | +| Windows | `%APPDATA%/ChatLab/data/databases/` | +| Linux | `~/.config/ChatLab/data/databases/` | + +每个聊天记录是一个独立的 `.db` 文件。 + +### 推荐工具 + +- [DB Browser for SQLite](https://sqlitebrowser.org/) - 免费开源,新手友好 +- [TablePlus](https://tableplus.com/) - 界面美观 +- [DBeaver](https://dbeaver.io/) - 功能强大 + +### 命令行访问 + +```bash +# macOS/Linux +sqlite3 ~/Library/Application\ Support/ChatLab/data/databases/你的数据库.db + +# 常用命令 +.tables # 查看所有表 +.schema message # 查看 message 表结构 +SELECT * FROM message LIMIT 10; # 查询消息 +``` + +### 表结构 + +- `meta` - 聊天记录元信息 +- `member` - 成员信息 +- `message` - 消息内容 +- `member_name_history` - 成员改名历史 + +### 注意事项 + +⚠️ 建议在 ChatLab **关闭时**访问数据库,避免锁冲突。 diff --git a/docs/cn/usage/troubleshooting.md b/docs/cn/usage/troubleshooting.md new file mode 100644 index 00000000..87590281 --- /dev/null +++ b/docs/cn/usage/troubleshooting.md @@ -0,0 +1,81 @@ +--- +outline: deep +--- + +# 故障排查指南 + +本文档帮助用户和开发者排查 ChatLab 使用中遇到的问题。 + +## 日志文件 + +**获取日志文件**:软件左下角的 **「设置」 > 「存储管理」 > 「日志文件」 > 打开目录** + +日志存储在 `文档/ChatLab/logs/` 目录下: + +``` +ChatLab/logs/ +├── app.log # 主程序日志 +├── ai/ # AI 相关日志 +│ └── ai_YYYY-MM-DD_HH-mm.log +└── import/ # 导入日志 + └── import_{sessionId}_{timestamp}.log +``` + +### 日志文件说明 + +| 目录/文件 | 内容 | +| -------------- | ------------------------------------------------ | +| `app.log` | 主程序日志,包含文件解析、数据库操作、IPC 通信等 | +| `ai/*.log` | AI 日志,包含 LLM 调用、Agent 执行、工具调用等 | +| `import/*.log` | 导入性能日志,包含导入速度、内存使用、各阶段耗时 | + +## 常见问题 + +### 1. 导入失败 + +**症状**:拖入文件后提示解析失败 + +**排查步骤**: + +1. 确认文件格式是否支持(.json / .jsonl / .txt) +2. 检查文件是否损坏(用文本编辑器打开查看) +3. 查看日志文件中的 `[Parser]` 相关错误 + +### 2. AI 功能无响应 + +**症状**:AI 实验室发送消息后无回复 + +**排查步骤**: + +1. 检查是否已配置 API Key(设置 > AI 设置) +2. 点击「验证」,确认 API 连接正常 +3. 查看日志文件中的 `[LLM]` 或 `[Agent]` 相关错误 + +**常见原因**: + +- API Key 无效或余额不足 +- API 服务商限流 + +### 3. 数据库错误 + +**症状**:打开会话时提示错误 + +**排查步骤**: + +1. 查看日志文件中的 `[Database]` 相关错误 +2. 检查数据库文件是否存在 + +## 反馈问题 + +如果以上方法无法解决问题,请: + +1. 收集日志文件 +2. 描述问题复现步骤 +3. 提交 Issue 到 GitHub + +**提交 Issue 时请包含**: + +- 操作系统及版本 +- ChatLab 版本 +- 问题描述及复现步骤 +- 相关日志片段(注意脱敏) diff --git a/docs/en/chatlab-format.md b/docs/en/chatlab-format.md new file mode 100644 index 00000000..7444cf01 --- /dev/null +++ b/docs/en/chatlab-format.md @@ -0,0 +1,5 @@ +# ChatLab Format Specification + +This entry has moved. + +Please refer to [ChatLab Format Specification](./standard/chatlab-format.md). diff --git a/docs/en/index.md b/docs/en/index.md new file mode 100644 index 00000000..910beecd --- /dev/null +++ b/docs/en/index.md @@ -0,0 +1,8 @@ +--- +layout: page +sidebar: false +title: ChatLab — Social Memory Agent +titleTemplate: false +--- + + diff --git a/docs/en/intro.md b/docs/en/intro.md new file mode 100644 index 00000000..d1ff5206 --- /dev/null +++ b/docs/en/intro.md @@ -0,0 +1,32 @@ +--- +outline: deep +--- + +# Introduction + +ChatLab is a free, open-source, local-first chat analysis app. + +In the digital era, chat histories are no longer just plain text. They carry a decade of social connections, precious voice notes from loved ones, and serve as the emotional memory we keep in the digital world. + +ChatLab exists to **help every user safely analyze and revisit their own social memories.** + +## How to use + +To analyze your chat history, you only need two steps: + +1. Export your chat records +2. Import your chat records + +Since each app exports differently, see [Export Chat Records](./usage/how-to-export.md) for detailed steps. + +## Next steps + + + +If you run into import or AI issues, check [Troubleshooting](./usage/troubleshooting.md). + +If your exported format isn't supported yet, see the [AI Conversion Guide](./standard/ai-converter.md). + +If you're a developer who supports exporting from other chat apps, you're welcome to align with the [Chat Data Exchange Standard](./standard/chatlab-format.md). + +For any other questions, join the community: [Community](./other/community.md) diff --git a/docs/en/other/community.md b/docs/en/other/community.md new file mode 100644 index 00000000..bb2096a0 --- /dev/null +++ b/docs/en/other/community.md @@ -0,0 +1,15 @@ +# Contact & Community + +Reach out via community channels or email to get updates and share feedback. + +## Discord + +[Click invite link](https://discord.gg/YPdkDM7sfy) to join the discussion group + +## Twitter (X) + +[https://x.com/chatlabfun](https://x.com/chatlabfun) + +## Contact Developer + +For any other questions, please contact the developer at hello@digua.me. diff --git a/docs/en/quick-start.md b/docs/en/quick-start.md new file mode 100644 index 00000000..69566995 --- /dev/null +++ b/docs/en/quick-start.md @@ -0,0 +1,29 @@ +--- +outline: deep +--- + +# Quick Start + +This page tells you what the docs can help with and gives the shortest reading path so you can go from export to analysis quickly. + +## What the docs cover + +- Choose the right export method and confirm supported formats +- Import chat records and prepare data correctly +- Configure AI models and avoid common pitfalls +- Troubleshoot issues with clear, actionable steps + +## Fastest path + +::: tip Suggested path +1. [Export Chat Records](./usage/how-to-export.md) +2. [Import Chat Records](./usage/how-to-import.md) +3. [How to Configure AI](./usage/how-to-config-ai.md) +4. [Troubleshooting](./usage/troubleshooting.md) +::: + +## Useful links + +- [Standard Format Specification](./standard/chatlab-format.md) +- [AI Conversion Guide](./standard/ai-converter.md) +- [Community](./other/community.md) diff --git a/docs/en/standard/ai-converter.md b/docs/en/standard/ai-converter.md new file mode 100644 index 00000000..2f2c128e --- /dev/null +++ b/docs/en/standard/ai-converter.md @@ -0,0 +1,88 @@ +--- +outline: deep +--- + +# AI Conversion Guide + +If your chat record format (such as CSV, HTML, TXT, or other database exports) is not directly supported by ChatLab, you can use AI (like ChatGPT, Claude, DeepSeek, etc.) to quickly write a conversion script to transform your data into ChatLab's standard format. + +## Preparation + +1. **Download the standard specification**: Click to download ChatLab Standard Format Specification v0.0.1 (.md) +2. **Prepare your data**: Have your exported original chat record file ready (if using online services, we recommend providing only a few hundred anonymized samples). + +## Choose Target Format + +Select the appropriate prompt based on your data size. + +### Scenario 1: Small to Medium Data (Recommended) + +- **Target Format**: JSON (`.json`) +- **Use Case**: Less than 1 million records, file size < 100MB. +- **Features**: Clear structure, best compatibility. + +#### Copy JSON Conversion Prompt + +```markdown +**Role Setting**: +You are an expert in data processing and script writing. + +**Task Objective**: +Based on the [ChatLab Standard Format Specification] (chatlab-format.md) I provide, please write a script to convert my uploaded [original chat records] into the compliant **JSON format**. + +**Requirements**: + +1. **Analyze Structure**: Analyze the text patterns or data structure of the original chat records. +2. **Field Mapping**: + - Map original fields to ChatLab standard fields (`timestamp`, `sender`, `content`, `type`, etc.). + - If the original data lacks `sender` (user ID), please automatically generate a unique hash or virtual ID based on `accountName` (username). + - Default `type` to 0 (text). If you can identify images, voice, or other types from the content, please try to map them. +3. **Script Generation**: + - Please write a **complete, executable script** (Python or Node.js recommended). + - **Output Structure**: The script should build a complete JSON object containing `chatlab`, `meta`, `members`, `messages`, and write it to a file at once. + - The script should include necessary error handling and print progress. +4. **Result Validation**: + - Ensure the generated JSON structure strictly conforms to the definitions in `chatlab-format.md`. + +**Output**: +Please provide the code directly and briefly explain how to run the script. +``` + +### Scenario 2: Very Large Data + +- **Target Format**: JSONL (`.jsonl`) +- **Use Case**: More than 1 million records, or very large file size. +- **Features**: Streaming read/write, extremely low memory usage, won't crash due to large data volumes. + +#### Copy JSONL Conversion Prompt + +```markdown +**Role Setting**: +You are an expert in big data processing and stream computing. + +**Task Objective**: +Based on the [ChatLab Standard Format Specification] (chatlab-format.md) I provide, please write a script to convert my uploaded [original chat records] into the compliant **JSONL (JSON Lines) format**. + +**Requirements**: + +1. **Analyze Structure**: Analyze the text patterns of the original chat records. +2. **Stream Processing**: + - **Must use streaming read/write** (Line-by-Line) approach; do not load all data into memory at once. + - Read the original file line by line, write to the target file line by line. +3. **JSONL Structure Requirements**: + - **First line**: Must write the `_type: "header"` line (containing `chatlab` and `meta` information). + - **Member information**: If possible, scan once or collect member information during processing, write `_type: "member"` lines. + - **Message records**: Each chat record writes one `_type: "message"` line. +4. **Script Generation**: + - Please write an **efficient Python script**. + - Ensure constant memory usage during processing, suitable for GB-level large files. + +**Output**: +Please provide the code directly and briefly explain how to run the script. +``` + +## Next Steps + +1. **Run the script**: Run the AI-generated script in your local environment. +2. **Check results**: Open the generated file and confirm the format is correct. +3. **Import to ChatLab**: Import the generated file into ChatLab for analysis. diff --git a/docs/en/standard/chatlab-format.md b/docs/en/standard/chatlab-format.md new file mode 100644 index 00000000..b60f137b --- /dev/null +++ b/docs/en/standard/chatlab-format.md @@ -0,0 +1,314 @@ +--- +outline: deep +--- + +# ChatLab Standard Format Specification v0.0.1 + +ChatLab defines a standard chat record data exchange format to support unified import and analysis of multi-platform data. + +As long as you convert your chat records to this format, ChatLab can parse and analyze them. + +📥 Click here to download this specification (.md) + +::: warning Notice +This format specification is still in its early development stage. Some fields and structures may be adjusted in future versions. +::: + +## Overview + +### Supported File Formats + +| Format | Extension | Use Case | +| --------- | --------- | ----------------------------------------------------------- | +| **JSON** | `.json` | Small to medium records (<1 million), clear structure | +| **JSONL** | `.jsonl` | Very large records (>1 million), streaming, constant memory | + +### Format Comparison + +| Feature | JSON | JSONL | +| --------------- | ------------------------------- | ------------------------------- | +| Memory Usage | Requires loading full structure | Line-by-line, constant (~100MB) | +| File Size Limit | ~1GB (depends on memory) | No practical limit | +| Append Writing | Requires rewriting entire file | ✅ Direct line append | +| Error Recovery | Single error invalidates file | Can skip error lines | +| Readability | ⭐⭐⭐ Easy to read | ⭐⭐ One record per line | +| Recommended For | Small/medium (<1M records) | Large (>1M records) | + +## Quick Start + +Here's a **minimal** ChatLab format example with only required fields: + +```json +{ + "chatlab": { + "version": "0.0.1", + "exportedAt": 1703001600 + }, + "meta": { + "name": "My Group Chat", + "platform": "qq", + "type": "group" + }, + "members": [ + { + "platformId": "123456", + "accountName": "John" + } + ], + "messages": [ + { + "sender": "123456", + "accountName": "John", + "timestamp": 1703001600, + "type": 0, + "content": "Hello everyone!" + } + ] +} +``` + +--- + +## JSON Format Detailed Specification + +### File Header (chatlab) + +| Field | Type | Required | Description | +| ------------- | ------ | -------- | --------------------------------------- | +| `version` | string | ✅ | Format version, currently `"0.0.1"` | +| `exportedAt` | number | ✅ | Export time (Unix timestamp in seconds) | +| `generator` | string | - | Generator tool name | +| `description` | string | - | Description | + +### Metadata (meta) + +| Field | Type | Required | Description | +| ------------- | ------------- | -------- | ------------------------------------------------------------------- | +| `name` | string | ✅ | Group name or conversation name | +| `platform` | string | ✅ | Platform identifier: `qq` / `wechat` / `discord` / `whatsapp`, etc. | +| `type` | string | ✅ | Chat type: `group` / `private` | +| `groupId` | string | - | Group ID (group chat only) | +| `groupAvatar` | string | - | Group avatar (Data URL format) | + +### Members (members) + +| Field | Type | Required | Description | +| --------------- | -------- | -------- | ----------------------------- | +| `platformId` | string | ✅ | User unique identifier | +| `accountName` | string | ✅ | Account name | +| `groupNickname` | string | - | Group nickname (group only) | +| `aliases` | string[] | - | User-defined aliases | +| `avatar` | string | - | User avatar (Data URL format) | + +### Messages (messages) + +| Field | Type | Required | Description | +| --------------- | -------------- | -------- | ------------------------------------- | +| `sender` | string | ✅ | Sender's `platformId` | +| `accountName` | string | ✅ | Account name when sending | +| `groupNickname` | string | - | Group nickname when sending | +| `timestamp` | number | ✅ | Unix timestamp in seconds | +| `type` | number | ✅ | Message type (see table below) | +| `content` | string \| null | ✅ | Message content (`null` for non-text) | + +--- + +## Message Type Reference + +::: warning Tip +If you have other special types in your chat records that need support, please submit an issue explaining your situation. We'll evaluate whether to add them to the standard message types. +::: + +### Basic Message Types (0-19) + +| Value | Name | Description | +| ----- | -------- | ------------- | +| 0 | TEXT | Text message | +| 1 | IMAGE | Image | +| 2 | VOICE | Voice | +| 3 | VIDEO | Video | +| 4 | FILE | File | +| 5 | EMOJI | Emoji/Sticker | +| 7 | LINK | Link/Card | +| 8 | LOCATION | Location | + +### Interactive Message Types (20-39) + +| Value | Name | Description | +| ----- | ---------- | --------------------------------- | +| 20 | RED_PACKET | Red packet | +| 21 | TRANSFER | Transfer | +| 22 | POKE | Poke/Nudge | +| 23 | CALL | Voice/Video call | +| 24 | SHARE | Share (music, mini program, etc.) | +| 25 | REPLY | Quote reply | +| 26 | FORWARD | Forward message | +| 27 | CONTACT | Contact card | + +### System Message Types (80+) + +| Value | Name | Description | +| ----- | ------ | ---------------------------------------- | +| 80 | SYSTEM | System message (join/leave/announcement) | +| 81 | RECALL | Recalled message | +| 99 | OTHER | Other/Unknown | + +## Avatar Format + +The `avatar` and `groupAvatar` fields use **Data URL** format: + +``` +data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD... +``` + +Supported image formats: + +- `image/jpeg` - JPEG format (recommended, smaller size) +- `image/png` - PNG format +- `image/gif` - GIF format +- `image/webp` - WebP format + +::: tip Suggestion +When exporting, we recommend compressing avatars to 100×100 pixels or less to reduce file size. +::: + +## Complete Examples + +### Group Chat Example (with optional fields) + +```json +{ + "chatlab": { + "version": "0.0.1", + "exportedAt": 1703001600, + "generator": "My Converter Tool", + "description": "2024 Tech Exchange Group Chat Backup" + }, + "meta": { + "name": "Tech Exchange Group", + "platform": "wechat", + "type": "group", + "groupId": "38988428513", + "groupAvatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + }, + "members": [ + { + "platformId": "abc123", + "accountName": "John", + "groupNickname": "Admin-John", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + }, + { + "platformId": "def456", + "accountName": "Jane", + "groupNickname": "Moderator", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + } + ], + "messages": [ + { + "sender": "abc123", + "accountName": "John", + "groupNickname": "Admin-John", + "timestamp": 1703001600, + "type": 0, + "content": "Hello everyone! Welcome to the Tech Exchange Group~" + }, + { + "sender": "def456", + "accountName": "Jane", + "groupNickname": "Moderator", + "timestamp": 1703001610, + "type": 1, + "content": "[Image: screenshot.jpg]" + } + ] +} +``` + +### Private Chat Example + +```json +{ + "chatlab": { + "version": "0.0.1", + "exportedAt": 1703001600 + }, + "meta": { + "name": "Conversation with Mike", + "platform": "qq", + "type": "private" + }, + "members": [ + { + "platformId": "123456789", + "accountName": "Me", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + }, + { + "platformId": "987654321", + "accountName": "Mike", + "avatar": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." + } + ], + "messages": [ + { + "sender": "123456789", + "accountName": "Me", + "timestamp": 1703001600, + "type": 0, + "content": "Hey, are you there?" + } + ] +} +``` + +## JSONL Streaming Format + +JSONL (JSON Lines) format is suitable for **very large chat records** (>1 million messages) to avoid memory overflow issues. + +### Format Features + +- One JSON object per line +- Distinguish line types by `_type` field: `header` / `member` / `message` +- Constant memory usage (~100MB), supports GB-level files +- Supports streaming writes, can append while exporting + +### Line Type Description + +| `_type` | Description | Required | +| --------- | ------------------------------------- | --------------------- | +| `header` | File header with `chatlab` and `meta` | ✅ Must be first line | +| `member` | Member information | - Optional | +| `message` | Message record | ✅ At least one | + +### Complete Example + +```jsonl +{"_type":"header","chatlab":{"version":"0.0.1","exportedAt":1703001600},"meta":{"name":"Tech Exchange Group","platform":"qq","type":"group"}} +{"_type":"member","platformId":"123456","accountName":"John","groupNickname":"Admin"} +{"_type":"member","platformId":"789012","accountName":"Jane"} +{"_type":"message","sender":"123456","accountName":"John","groupNickname":"Admin","timestamp":1703001600,"type":0,"content":"Hello everyone!"} +{"_type":"message","sender":"789012","accountName":"Jane","timestamp":1703001610,"type":0,"content":"Hi there!"} +{"_type":"message","sender":"123456","accountName":"John","groupNickname":"Admin","timestamp":1703001620,"type":1,"content":"[Image]"} +``` + +### Parsing Rules + +1. **First line must be header**: Contains `chatlab` version and `meta` information +2. **Member lines before messages**: Optional; if omitted, member info will be collected from messages +3. **Messages sorted by time**: Recommended to sort by `timestamp` in ascending order +4. **Each line is independent**: Single line parsing errors can be skipped +5. **Comment lines supported**: Lines starting with `#` are skipped (can be used for notes) + +::: warning Notice + +- Each line must be **valid JSON** (cannot span lines) +- Lines are separated by newline `\n` + ::: + +## Version History + +| Version | Date | Changes | +| ------- | ------- | --------------- | +| 0.0.1 | 2025-12 | Initial version | diff --git a/docs/en/usage/how-to-config-ai.md b/docs/en/usage/how-to-config-ai.md new file mode 100644 index 00000000..c9761e0b --- /dev/null +++ b/docs/en/usage/how-to-config-ai.md @@ -0,0 +1,31 @@ +--- +outline: deep +--- + +# How to Configure AI Models + +## Online AI Models + +Here we use DeepSeek as an example. For other models, please search for their configuration methods: + +1. Visit [DeepSeek official website](https://www.deepseek.com/), select API Platform, register and log in + +![](/en/img/ai-guide/1.png) + +2. Select "Recharge" on the left side and add credit (we recommend starting with a small amount - it will last a long time) + +![](/en/img/ai-guide/2.png) + +3. Select "API Keys" on the left side, click "Create API key", enter any title, then copy the API Key + +![](/en/img/ai-guide/3.png) + +4. Open ChatLab, click Settings in the bottom right corner, then "Model Configuration" > "Add New Configuration" + +Paste the API Key you copied into the API Keys field, click Verify to confirm it works, then click Add in the bottom right corner to start using AI features + +![](/en/img/ai-guide/4.png) + +5. In the usage information on the left, you can check your credit balance and consumption + +![](/en/img/ai-guide/5.png) diff --git a/docs/en/usage/how-to-export.md b/docs/en/usage/how-to-export.md new file mode 100644 index 00000000..b5518d55 --- /dev/null +++ b/docs/en/usage/how-to-export.md @@ -0,0 +1,100 @@ +--- +outline: deep +--- + +# Export Chat Records Guide + +ChatLab focuses on analyzing exported data - we don't provide data extraction features. You'll need to first use official features or third-party tools from the open-source community to export your chat records, then import them into ChatLab for analysis. + +Tips: Welcome to join the [ChatLab Community](../other/community.md) to discuss issues and share feedback. + +::: danger Important Notice **When using third-party export tools, please carefully read their official documentation, privacy policies, and security instructions.** + +**ChatLab has no direct or indirect association with the projects listed below. These links are provided solely as technical information for your reference and do not imply any endorsement, guarantee, or warranty of their safety by ChatLab. Users must evaluate and assume all risks associated with using these third-party tools.** ::: + +## WhatsApp + +For WhatsApp, we currently support the official "Export Chat" feature. + +We currently support exports in English and Chinese languages. For other language needs, please contact the developer. + +- **Export Method**: + 1. Open WhatsApp and go to the conversation you want to export. + 2. Tap the contact name at the top -> Export Chat. + 3. Select "Without Media". +- **Format**: Extract the `txt` file from the exported `.zip` package and drag the `txt` file into ChatLab. + +## Discord + +For Discord, we currently support the JSON format exported by **DiscordChatExporter**. + +- **Project URL**: [https://github.com/Tyrrrz/DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter) +- **Supported Platforms**: Windows / macOS / Linux +- **Usage Guide**: Refer to the project README. +- **Tip**: Please make sure to select **JSON** as the export format for ChatLab to parse correctly. + +## Instagram + +For Instagram, we currently support the official export feature. + +- **Export Method**: + 1. Open the Instagram app or web version, go to "Settings". + 2. Click "Accounts Center" -> "Your information and permissions" -> "Download your information". + 3. Select "Some of your information", then check "Messages". + 4. Select format as **JSON**, and date range as "All time". + 5. Click "Submit request" and wait for Instagram to process, then download. +- **Format**: After extracting the downloaded archive, find the `message_1.json` file in the `your_instagram_activity/messages/inbox/` directory for the corresponding chat, and drag it into ChatLab. +- **Tip**: If the conversation has a lot of content, there may be multiple `message_*.json` files. We recommend importing them one by one. + +## iMessage + +We plan to support the JSON format exported by https://github.com/ReagentX/imessage-exporter + +However, the developer currently doesn't have data samples for testing. If you have an urgent need, please provide anonymized data samples and we'll support it as soon as possible. + +## LINE + +For LINE, we currently support the official chat export feature. + +- **Export Method**: + 1. Open LINE and go to the conversation you want to export. + 2. On mobile: tap the menu in the top-right corner of the chat -> Settings -> Export chat history. + 3. On desktop (Windows / macOS): open Chats, enter the target conversation, then click the top-right menu -> Save chat. + 4. Save or share the exported text file. +- **Format**: Drag the exported `.txt` file directly into ChatLab. +- **Tip**: According to LINE's official help, the desktop app only saves messages that are currently loaded and visible in the chat window. + +## Telegram + +For Telegram, we currently support the official export feature provided by Telegram Desktop. + +- **Export Method**: + 1. Open the latest version of Telegram Desktop. + 2. Go to `Settings` -> `Advanced` -> `Export Telegram data`. + 3. In the export panel, select the chats you want to export. + 4. Choose **Machine-readable JSON** as the format. If you also want a readable copy, you can choose **Both**, but JSON must be included. + 5. Choose the export folder and wait for Telegram to finish processing. +- **Format**: Import the main JSON file from the export folder (usually `result.json`) into ChatLab. +- **Tip**: Telegram's official export entry is on desktop. For some accounts, the first export request may be delayed for security reasons and must be completed later on the same device. + +## Q&A: Can I analyze chat records from other chat applications? + +For various chat analysis needs, here's a unified response: + +ChatLab's function is to **analyze exported chat records in fixed text formats**, but the prerequisite is that **you have already exported chat records through legal and compliant channels**. + +We **do not provide any decryption, packet capture, or export tools and scripts**. We only support compatibility with exported chat record formats. As long as you can provide anonymized chat record text samples, we can try to support analysis. + +If you have some technical background, you can try using **AI-assisted conversion** to convert your data to the standard format. For details, please check the [AI Conversion Guide](../standard/ai-converter.md). + +Additionally, if you're a developer and have already supported chat record export for other chat applications, you're welcome to [make it compatible with ChatLab format](../standard/chatlab-format.md), and we'll add your GitHub link here. + +## ⚠️ Legal & Security Disclaimer + +Before attempting to analyze data from the above applications, please be aware: + +- **Legal Authorization Principle**: You may only process chat records that **you personally participated in**. If privacy of others is involved, please ensure you have obtained informed consent from the relevant parties. +- **Prohibited Illegal Use**: It is strictly forbidden to use this software for stealing, monitoring, or analyzing unauthorized private information of others, or for any behavior that infringes on others' rights. +- **Compliance Self-responsibility**: Obtaining data from third-party platforms is your personal behavior. If your analysis violates the original data source platform's terms of service resulting in account restrictions or other consequences, ChatLab assumes no responsibility. +- **No Commercial Use**: It is strictly forbidden for any individual or organization to use this software or analysis results for any form of commercial profit. +- **Result Accuracy**: Analysis results generated by the software may contain errors or "hallucinations" and are for technical reference only. They should not be used as legal evidence or decision-making basis. diff --git a/docs/en/usage/how-to-import.md b/docs/en/usage/how-to-import.md new file mode 100644 index 00000000..be10f93b --- /dev/null +++ b/docs/en/usage/how-to-import.md @@ -0,0 +1,18 @@ +--- +outline: deep +--- + +# Import Chat Records Guide + +After completing the export, you simply need to: + +1. Drag the exported **data file** directly into the upload area on ChatLab's homepage. +2. Wait for ChatLab to finish parsing. + +## Bug Troubleshooting + +If the import fails, you can quickly troubleshoot the issue through logs: + +Go to "Settings" in the bottom left corner > "Basic Settings" > "Log Files", and open that directory. Inside, there's an "import" folder containing all import log records. + +If you can't understand the logs, you can submit an issue on GitHub. diff --git a/docs/en/usage/index.md b/docs/en/usage/index.md new file mode 100644 index 00000000..ae639aa5 --- /dev/null +++ b/docs/en/usage/index.md @@ -0,0 +1,5 @@ +--- +outline: deep +--- + +# USAGE \ No newline at end of file diff --git a/docs/en/usage/troubleshooting.md b/docs/en/usage/troubleshooting.md new file mode 100644 index 00000000..dd8f9edb --- /dev/null +++ b/docs/en/usage/troubleshooting.md @@ -0,0 +1,81 @@ +--- +outline: deep +--- + +# Troubleshooting Guide + +This document helps users and developers troubleshoot issues encountered when using ChatLab. + +## Log Files + +**Access log files**: Bottom left corner **"Settings" > "Storage Management" > "Log Files" > Open Directory** + +Logs are stored in the `Documents/ChatLab/logs/` directory: + +``` +ChatLab/logs/ +├── app.log # Main program log +├── ai/ # AI-related logs +│ └── ai_YYYY-MM-DD_HH-mm.log +└── import/ # Import logs + └── import_{sessionId}_{timestamp}.log +``` + +### Log File Description + +| Directory/File | Contents | +| -------------- | ------------------------------------------------- | +| `app.log` | Main program log: file parsing, database ops, IPC | +| `ai/*.log` | AI logs: LLM calls, Agent execution, tool calls | +| `import/*.log` | Import performance: speed, memory usage, timing | + +## Common Issues + +### 1. Import Failed + +**Symptoms**: Parsing error after dragging in a file + +**Troubleshooting Steps**: + +1. Confirm the file format is supported (.json / .jsonl / .txt) +2. Check if the file is corrupted (open with a text editor) +3. Check `[Parser]` related errors in the log files + +### 2. AI Features Not Responding + +**Symptoms**: No response after sending a message in AI Lab + +**Troubleshooting Steps**: + +1. Check if API Key is configured (Settings > AI Settings) +2. Click "Verify" to confirm API connection is working +3. Check `[LLM]` or `[Agent]` related errors in the log files + +**Common Causes**: + +- Invalid API Key or insufficient balance +- API provider rate limiting + +### 3. Database Error + +**Symptoms**: Error when opening a session + +**Troubleshooting Steps**: + +1. Check `[Database]` related errors in the log files +2. Verify the database file exists + +## Reporting Issues + +If the above methods don't solve your problem, please: + +1. Collect log files +2. Describe the steps to reproduce the issue +3. Submit an Issue on GitHub + +**When submitting an Issue, please include**: + +- Operating system and version +- ChatLab version +- Problem description and reproduction steps +- Relevant log snippets (remember to anonymize sensitive data) diff --git a/docs/env.d.ts b/docs/env.d.ts new file mode 100644 index 00000000..83a0b1f3 --- /dev/null +++ b/docs/env.d.ts @@ -0,0 +1,4 @@ +declare module "*.md?raw" { + const content: string; + export default content; +}