Files
ChatLab/electron/main/worker/query/advanced/repeat.ts

331 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 复读分析 + 口头禅分析模块
*/
import { openDatabase, buildTimeFilter, type TimeFilter } from '../../core'
// ==================== 复读分析 ====================
/**
* 获取复读分析数据
*/
export function getRepeatAnalysis(sessionId: string, filter?: TimeFilter): any {
const db = openDatabase(sessionId)
const emptyResult = {
originators: [],
initiators: [],
breakers: [],
originatorRates: [],
initiatorRates: [],
breakerRates: [],
chainLengthDistribution: [],
hotContents: [],
avgChainLength: 0,
totalRepeatChains: 0,
}
if (!db) return emptyResult
const { clause, params } = buildTimeFilter(filter)
let whereClause = clause
if (whereClause.includes('WHERE')) {
whereClause += " AND COALESCE(m.account_name, '') != '系统消息' AND msg.type = 0 AND msg.content IS NOT NULL AND TRIM(msg.content) != ''"
} else {
whereClause = " WHERE COALESCE(m.account_name, '') != '系统消息' AND msg.type = 0 AND msg.content IS NOT NULL AND TRIM(msg.content) != ''"
}
const messages = db
.prepare(
`
SELECT
msg.id,
msg.sender_id as senderId,
msg.content,
msg.ts,
m.platform_id as platformId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as name
FROM message msg
JOIN member m ON msg.sender_id = m.id
${whereClause}
ORDER BY msg.ts ASC, msg.id ASC
`
)
.all(...params) as Array<{
id: number
senderId: number
content: string
ts: number
platformId: string
name: string
}>
const originatorCount = new Map<number, number>()
const initiatorCount = new Map<number, number>()
const breakerCount = new Map<number, number>()
const memberMessageCount = new Map<number, number>()
const memberInfo = new Map<number, { platformId: string; name: string }>()
const chainLengthCount = new Map<number, number>()
const contentStats = new Map<
string,
{ count: number; maxChainLength: number; originatorId: number; lastTs: number }
>()
let currentContent: string | null = null
let repeatChain: Array<{ senderId: number; content: string; ts: number }> = []
let totalRepeatChains = 0
let totalChainLength = 0
const fastestRepeaterStats = new Map<number, { totalDiff: number; count: number }>()
const processRepeatChain = (chain: Array<{ senderId: number; content: string; ts: number }>, breakerId?: number) => {
if (chain.length < 3) return
totalRepeatChains++
const chainLength = chain.length
totalChainLength += chainLength
const originatorId = chain[0].senderId
originatorCount.set(originatorId, (originatorCount.get(originatorId) || 0) + 1)
const initiatorId = chain[1].senderId
initiatorCount.set(initiatorId, (initiatorCount.get(initiatorId) || 0) + 1)
if (breakerId !== undefined) {
breakerCount.set(breakerId, (breakerCount.get(breakerId) || 0) + 1)
}
chainLengthCount.set(chainLength, (chainLengthCount.get(chainLength) || 0) + 1)
const content = chain[0].content
const chainTs = chain[0].ts
const existing = contentStats.get(content)
if (existing) {
existing.count++
existing.lastTs = Math.max(existing.lastTs, chainTs)
if (chainLength > existing.maxChainLength) {
existing.maxChainLength = chainLength
existing.originatorId = originatorId
}
} else {
contentStats.set(content, { count: 1, maxChainLength: chainLength, originatorId, lastTs: chainTs })
}
// 计算反应时间 (Fastest Follower)
// 从第二个消息开始,计算与前一条消息的时间差
for (let i = 1; i < chain.length; i++) {
const currentMsg = chain[i]
const prevMsg = chain[i - 1]
const diff = (currentMsg.ts - prevMsg.ts) * 1000 // 毫秒
// 只统计 20 秒内的复读,排除间隔过久的"伪复读"
if (diff <= 20 * 1000) {
if (!fastestRepeaterStats.has(currentMsg.senderId)) {
fastestRepeaterStats.set(currentMsg.senderId, { totalDiff: 0, count: 0 })
}
const stats = fastestRepeaterStats.get(currentMsg.senderId)!
stats.totalDiff += diff
stats.count++
}
}
}
for (const msg of messages) {
if (!memberInfo.has(msg.senderId)) {
memberInfo.set(msg.senderId, { platformId: msg.platformId, name: msg.name })
}
memberMessageCount.set(msg.senderId, (memberMessageCount.get(msg.senderId) || 0) + 1)
const content = msg.content.trim()
if (content === currentContent) {
const lastSender = repeatChain[repeatChain.length - 1]?.senderId
if (lastSender !== msg.senderId) {
repeatChain.push({ senderId: msg.senderId, content, ts: msg.ts })
}
} else {
processRepeatChain(repeatChain, msg.senderId)
currentContent = content
repeatChain = [{ senderId: msg.senderId, content, ts: msg.ts }]
}
}
processRepeatChain(repeatChain)
const buildRankList = (countMap: Map<number, number>, total: number): any[] => {
const items: any[] = []
for (const [memberId, count] of countMap.entries()) {
const info = memberInfo.get(memberId)
if (info) {
items.push({
memberId,
platformId: info.platformId,
name: info.name,
count,
percentage: total > 0 ? Math.round((count / total) * 10000) / 100 : 0,
})
}
}
return items.sort((a, b) => b.count - a.count)
}
const buildRateList = (countMap: Map<number, number>): any[] => {
const items: any[] = []
for (const [memberId, count] of countMap.entries()) {
const info = memberInfo.get(memberId)
const totalMessages = memberMessageCount.get(memberId) || 0
if (info && totalMessages > 0) {
items.push({
memberId,
platformId: info.platformId,
name: info.name,
count,
totalMessages,
rate: Math.round((count / totalMessages) * 10000) / 100,
})
}
}
return items.sort((a, b) => b.rate - a.rate)
}
const buildFastestList = (): any[] => {
const items: any[] = []
for (const [memberId, stats] of fastestRepeaterStats.entries()) {
// 过滤掉偶尔复读的人至少参与5次复读才统计避免数据偏差
if (stats.count < 5) continue
const info = memberInfo.get(memberId)
if (info) {
items.push({
memberId,
platformId: info.platformId,
name: info.name,
count: stats.count,
avgTimeDiff: Math.round(stats.totalDiff / stats.count),
})
}
}
return items.sort((a, b) => a.avgTimeDiff - b.avgTimeDiff) // 越快越好
}
const chainLengthDistribution: any[] = []
for (const [length, count] of chainLengthCount.entries()) {
chainLengthDistribution.push({ length, count })
}
chainLengthDistribution.sort((a, b) => a.length - b.length)
const hotContents: any[] = []
for (const [content, stats] of contentStats.entries()) {
const originatorInfo = memberInfo.get(stats.originatorId)
hotContents.push({
content,
count: stats.count,
maxChainLength: stats.maxChainLength,
originatorName: originatorInfo?.name || '未知',
lastTs: stats.lastTs,
})
}
hotContents.sort((a, b) => b.maxChainLength - a.maxChainLength)
const top50HotContents = hotContents.slice(0, 50)
return {
originators: buildRankList(originatorCount, totalRepeatChains),
initiators: buildRankList(initiatorCount, totalRepeatChains),
breakers: buildRankList(breakerCount, totalRepeatChains),
fastestRepeaters: buildFastestList(),
originatorRates: buildRateList(originatorCount),
initiatorRates: buildRateList(initiatorCount),
breakerRates: buildRateList(breakerCount),
chainLengthDistribution,
hotContents: top50HotContents,
avgChainLength: totalRepeatChains > 0 ? Math.round((totalChainLength / totalRepeatChains) * 100) / 100 : 0,
totalRepeatChains,
}
}
// ==================== 口头禅分析 ====================
/**
* 获取口头禅分析数据
*/
export function getCatchphraseAnalysis(sessionId: string, filter?: TimeFilter): any {
const db = openDatabase(sessionId)
if (!db) return { members: [] }
const { clause, params } = buildTimeFilter(filter)
let whereClause = clause
if (whereClause.includes('WHERE')) {
whereClause +=
" AND COALESCE(m.account_name, '') != '系统消息' AND msg.type = 0 AND msg.content IS NOT NULL AND LENGTH(TRIM(msg.content)) >= 2"
} else {
whereClause =
" WHERE COALESCE(m.account_name, '') != '系统消息' AND msg.type = 0 AND msg.content IS NOT NULL AND LENGTH(TRIM(msg.content)) >= 2"
}
const rows = db
.prepare(
`
SELECT
m.id as memberId,
m.platform_id as platformId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as name,
TRIM(msg.content) as content,
COUNT(*) as count
FROM message msg
JOIN member m ON msg.sender_id = m.id
${whereClause}
GROUP BY m.id, TRIM(msg.content)
ORDER BY m.id, count DESC
`
)
.all(...params) as Array<{
memberId: number
platformId: string
name: string
content: string
count: number
}>
const memberMap = new Map<
number,
{
memberId: number
platformId: string
name: string
catchphrases: Array<{ content: string; count: number }>
}
>()
for (const row of rows) {
if (!memberMap.has(row.memberId)) {
memberMap.set(row.memberId, {
memberId: row.memberId,
platformId: row.platformId,
name: row.name,
catchphrases: [],
})
}
const member = memberMap.get(row.memberId)!
if (member.catchphrases.length < 5) {
member.catchphrases.push({
content: row.content,
count: row.count,
})
}
}
const members = Array.from(memberMap.values())
members.sort((a, b) => {
const aTotal = a.catchphrases.reduce((sum, c) => sum + c.count, 0)
const bTotal = b.catchphrases.reduce((sum, c) => sum + c.count, 0)
return bTotal - aTotal
})
return { members }
}