Files
ChatLab/electron/main/worker/query/fts.ts
T
2026-04-06 22:24:49 +08:00

207 lines
5.3 KiB
TypeScript

/**
* FTS5 全文搜索索引管理模块
*
* 使用 Contentless FTS5 虚拟表为 message.content 提供倒排索引。
* 通过应用层预分词(jieba + Intl.Segmenter),空格分隔后存入 FTS5。
*
* DELETE 策略:不在 FTS 做同步删除,依赖 JOIN message 自然过滤。
* 提供 rebuildFtsIndex() 用于手动清理无效条目。
*/
import Database from 'better-sqlite3'
import { getDbPath, openDatabase } from '../core'
import { tokenizeForFts, tokenizeQueryForFts } from '../../nlp/ftsTokenizer'
const BATCH_SIZE = 5000
const CREATE_FTS_TABLE_SQL = `
CREATE VIRTUAL TABLE IF NOT EXISTS message_fts USING fts5(
content,
content='',
content_rowid=id
)
`
/**
* 打开可写数据库(FTS 写入专用,不使用缓存池)
*/
function openWritableDb(sessionId: string): Database.Database | null {
const dbPath = getDbPath(sessionId)
try {
const db = new Database(dbPath)
db.pragma('journal_mode = WAL')
return db
} catch {
return null
}
}
/**
* 检查数据库是否已有 FTS 索引
*/
export function hasFtsIndex(sessionId: string): boolean {
const db = openDatabase(sessionId)
if (!db) return false
try {
const row = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='message_fts'").get() as
| { name: string }
| undefined
return !!row
} catch {
return false
}
}
/**
* 创建 FTS 虚拟表(如果不存在)
*/
export function createFtsTable(db: Database.Database): void {
db.exec(CREATE_FTS_TABLE_SQL)
}
/**
* 批量构建 FTS 索引(分批处理,每批 BATCH_SIZE 条)
* 用于迁移或首次导入后的全量构建。
*/
export function buildFtsIndex(sessionId: string): { indexed: number } {
const db = openWritableDb(sessionId)
if (!db) return { indexed: 0 }
try {
createFtsTable(db)
const insertFts = db.prepare('INSERT INTO message_fts(rowid, content) VALUES (?, ?)')
const countRow = db
.prepare("SELECT COUNT(*) as total FROM message WHERE type = 0 AND content IS NOT NULL AND content != ''")
.get() as { total: number }
const total = countRow.total
let indexed = 0
let offset = 0
while (offset < total) {
const rows = db
.prepare(
`SELECT id, content FROM message
WHERE type = 0 AND content IS NOT NULL AND content != ''
ORDER BY id ASC
LIMIT ? OFFSET ?`
)
.all(BATCH_SIZE, offset) as Array<{ id: number; content: string }>
if (rows.length === 0) break
const batchInsert = db.transaction(() => {
for (const row of rows) {
const tokens = tokenizeForFts(row.content)
if (tokens) {
insertFts.run(row.id, tokens)
}
}
})
batchInsert()
indexed += rows.length
offset += BATCH_SIZE
}
return { indexed }
} finally {
db.close()
}
}
/**
* 重建 FTS 索引(清空后重新构建)
* 用于清理无效条目(如删除成员后)或修复索引
*/
export function rebuildFtsIndex(sessionId: string): { indexed: number } {
const db = openWritableDb(sessionId)
if (!db) return { indexed: 0 }
try {
const hasTable = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='message_fts'").get()
if (hasTable) {
db.exec('DROP TABLE message_fts')
}
db.close()
return buildFtsIndex(sessionId)
} catch {
db.close()
return { indexed: 0 }
}
}
/**
* 批量写入 FTS 条目
* 用于增量导入时同步写入
*/
export function insertFtsEntries(sessionId: string, entries: Array<{ id: number; content: string | null }>): void {
const db = openWritableDb(sessionId)
if (!db) return
try {
const hasTable = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='message_fts'").get()
if (!hasTable) {
db.close()
return
}
const insertFts = db.prepare('INSERT INTO message_fts(rowid, content) VALUES (?, ?)')
const batchInsert = db.transaction(() => {
for (const entry of entries) {
if (entry.content) {
const tokens = tokenizeForFts(entry.content)
if (tokens) {
insertFts.run(entry.id, tokens)
}
}
}
})
batchInsert()
} finally {
db.close()
}
}
/**
* 通过 FTS5 搜索消息,返回匹配的 message rowids
*/
export function searchByFts(
sessionId: string,
keywords: string[],
limit: number = 1000,
offset: number = 0
): { rowids: number[]; total: number } {
if (keywords.length === 0) return { rowids: [], total: 0 }
const db = openDatabase(sessionId)
if (!db) return { rowids: [], total: 0 }
const matchQuery = tokenizeQueryForFts(keywords)
if (!matchQuery) return { rowids: [], total: 0 }
try {
const countRow = db.prepare('SELECT COUNT(*) as total FROM message_fts WHERE content MATCH ?').get(matchQuery) as {
total: number
}
const rows = db
.prepare(`SELECT rowid FROM message_fts WHERE content MATCH ? ORDER BY rank LIMIT ? OFFSET ?`)
.all(matchQuery, limit, offset) as Array<{ rowid: number }>
return {
rowids: rows.map((r) => r.rowid),
total: countRow.total,
}
} catch (error) {
console.error('[FTS] Search failed, query:', matchQuery, error)
return { rowids: [], total: 0 }
}
}