mirror of
https://github.com/hellodigua/ChatLab.git
synced 2026-05-22 06:10:37 +08:00
207 lines
5.3 KiB
TypeScript
207 lines
5.3 KiB
TypeScript
/**
|
|
* FTS5 全文搜索索引管理模块
|
|
*
|
|
* 使用 Contentless FTS5 虚拟表为 message.content 提供倒排索引。
|
|
* 通过应用层预分词(jieba + Intl.Segmenter),空格分隔后存入 FTS5。
|
|
*
|
|
* DELETE 策略:不在 FTS 做同步删除,依赖 JOIN message 自然过滤。
|
|
* 提供 rebuildFtsIndex() 用于手动清理无效条目。
|
|
*/
|
|
|
|
import Database from 'better-sqlite3'
|
|
import { getDbPath, openDatabase } from '../core'
|
|
import { tokenizeForFts, tokenizeQueryForFts } from '../../nlp/ftsTokenizer'
|
|
|
|
const BATCH_SIZE = 5000
|
|
|
|
const CREATE_FTS_TABLE_SQL = `
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS message_fts USING fts5(
|
|
content,
|
|
content='',
|
|
content_rowid=id
|
|
)
|
|
`
|
|
|
|
/**
|
|
* 打开可写数据库(FTS 写入专用,不使用缓存池)
|
|
*/
|
|
function openWritableDb(sessionId: string): Database.Database | null {
|
|
const dbPath = getDbPath(sessionId)
|
|
try {
|
|
const db = new Database(dbPath)
|
|
db.pragma('journal_mode = WAL')
|
|
return db
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 检查数据库是否已有 FTS 索引
|
|
*/
|
|
export function hasFtsIndex(sessionId: string): boolean {
|
|
const db = openDatabase(sessionId)
|
|
if (!db) return false
|
|
try {
|
|
const row = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='message_fts'").get() as
|
|
| { name: string }
|
|
| undefined
|
|
return !!row
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 创建 FTS 虚拟表(如果不存在)
|
|
*/
|
|
export function createFtsTable(db: Database.Database): void {
|
|
db.exec(CREATE_FTS_TABLE_SQL)
|
|
}
|
|
|
|
/**
|
|
* 批量构建 FTS 索引(分批处理,每批 BATCH_SIZE 条)
|
|
* 用于迁移或首次导入后的全量构建。
|
|
*/
|
|
export function buildFtsIndex(sessionId: string): { indexed: number } {
|
|
const db = openWritableDb(sessionId)
|
|
if (!db) return { indexed: 0 }
|
|
|
|
try {
|
|
createFtsTable(db)
|
|
|
|
const insertFts = db.prepare('INSERT INTO message_fts(rowid, content) VALUES (?, ?)')
|
|
|
|
const countRow = db
|
|
.prepare("SELECT COUNT(*) as total FROM message WHERE type = 0 AND content IS NOT NULL AND content != ''")
|
|
.get() as { total: number }
|
|
const total = countRow.total
|
|
|
|
let indexed = 0
|
|
let offset = 0
|
|
|
|
while (offset < total) {
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT id, content FROM message
|
|
WHERE type = 0 AND content IS NOT NULL AND content != ''
|
|
ORDER BY id ASC
|
|
LIMIT ? OFFSET ?`
|
|
)
|
|
.all(BATCH_SIZE, offset) as Array<{ id: number; content: string }>
|
|
|
|
if (rows.length === 0) break
|
|
|
|
const batchInsert = db.transaction(() => {
|
|
for (const row of rows) {
|
|
const tokens = tokenizeForFts(row.content)
|
|
if (tokens) {
|
|
insertFts.run(row.id, tokens)
|
|
}
|
|
}
|
|
})
|
|
batchInsert()
|
|
|
|
indexed += rows.length
|
|
offset += BATCH_SIZE
|
|
}
|
|
|
|
return { indexed }
|
|
} finally {
|
|
db.close()
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 重建 FTS 索引(清空后重新构建)
|
|
* 用于清理无效条目(如删除成员后)或修复索引
|
|
*/
|
|
export function rebuildFtsIndex(sessionId: string): { indexed: number } {
|
|
const db = openWritableDb(sessionId)
|
|
if (!db) return { indexed: 0 }
|
|
|
|
try {
|
|
const hasTable = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='message_fts'").get()
|
|
|
|
if (hasTable) {
|
|
db.exec('DROP TABLE message_fts')
|
|
}
|
|
|
|
db.close()
|
|
|
|
return buildFtsIndex(sessionId)
|
|
} catch {
|
|
db.close()
|
|
return { indexed: 0 }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 批量写入 FTS 条目
|
|
* 用于增量导入时同步写入
|
|
*/
|
|
export function insertFtsEntries(sessionId: string, entries: Array<{ id: number; content: string | null }>): void {
|
|
const db = openWritableDb(sessionId)
|
|
if (!db) return
|
|
|
|
try {
|
|
const hasTable = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='message_fts'").get()
|
|
if (!hasTable) {
|
|
db.close()
|
|
return
|
|
}
|
|
|
|
const insertFts = db.prepare('INSERT INTO message_fts(rowid, content) VALUES (?, ?)')
|
|
|
|
const batchInsert = db.transaction(() => {
|
|
for (const entry of entries) {
|
|
if (entry.content) {
|
|
const tokens = tokenizeForFts(entry.content)
|
|
if (tokens) {
|
|
insertFts.run(entry.id, tokens)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
batchInsert()
|
|
} finally {
|
|
db.close()
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 通过 FTS5 搜索消息,返回匹配的 message rowids
|
|
*/
|
|
export function searchByFts(
|
|
sessionId: string,
|
|
keywords: string[],
|
|
limit: number = 1000,
|
|
offset: number = 0
|
|
): { rowids: number[]; total: number } {
|
|
if (keywords.length === 0) return { rowids: [], total: 0 }
|
|
|
|
const db = openDatabase(sessionId)
|
|
if (!db) return { rowids: [], total: 0 }
|
|
|
|
const matchQuery = tokenizeQueryForFts(keywords)
|
|
if (!matchQuery) return { rowids: [], total: 0 }
|
|
|
|
try {
|
|
const countRow = db.prepare('SELECT COUNT(*) as total FROM message_fts WHERE content MATCH ?').get(matchQuery) as {
|
|
total: number
|
|
}
|
|
|
|
const rows = db
|
|
.prepare(`SELECT rowid FROM message_fts WHERE content MATCH ? ORDER BY rank LIMIT ? OFFSET ?`)
|
|
.all(matchQuery, limit, offset) as Array<{ rowid: number }>
|
|
|
|
return {
|
|
rowids: rows.map((r) => r.rowid),
|
|
total: countRow.total,
|
|
}
|
|
} catch (error) {
|
|
console.error('[FTS] Search failed, query:', matchQuery, error)
|
|
return { rowids: [], total: 0 }
|
|
}
|
|
}
|