feat: 新增复读分析

This commit is contained in:
digua
2025-11-27 21:58:01 +08:00
parent 84bfab0b43
commit 00eea87a11
7 changed files with 577 additions and 3 deletions
+238
View File
@@ -15,6 +15,11 @@ import type {
HourlyActivity,
DailyActivity,
MessageType,
RepeatAnalysis,
RepeatStatItem,
RepeatRateItem,
ChainLengthDistribution,
HotRepeatContent,
} from '../../../src/types/chat'
// 数据库存储目录
@@ -721,3 +726,236 @@ export function getMemberNameHistory(
db.close()
}
}
/**
* 获取复读分析数据
* 使用滑动窗口算法检测复读链:
* - 复读成立条件:至少 3 条连续的相同内容消息,且发送者不同
* - 排除:系统消息、空消息、图片消息
*/
export function getRepeatAnalysis(sessionId: string, filter?: TimeFilter): RepeatAnalysis {
const db = openDatabase(sessionId)
const emptyResult: RepeatAnalysis = {
originators: [],
initiators: [],
breakers: [],
originatorRates: [],
initiatorRates: [],
breakerRates: [],
chainLengthDistribution: [],
hotContents: [],
avgChainLength: 0,
totalRepeatChains: 0,
}
if (!db) {
return emptyResult
}
try {
const { clause, params } = buildTimeFilter(filter)
// 构建查询条件:排除系统消息、空消息、图片
// MessageType: TEXT = 0, IMAGE = 1, SYSTEM = 6
let whereClause = clause
if (whereClause.includes('WHERE')) {
whereClause +=
" AND m.name != '系统消息' AND msg.type = 0 AND msg.content IS NOT NULL AND TRIM(msg.content) != ''"
} else {
whereClause =
" WHERE m.name != '系统消息' AND msg.type = 0 AND msg.content IS NOT NULL AND TRIM(msg.content) != ''"
}
// 按时间顺序获取所有符合条件的消息
const messages = db
.prepare(
`
SELECT
msg.id,
msg.sender_id as senderId,
msg.content,
m.platform_id as platformId,
m.name
FROM message msg
JOIN member m ON msg.sender_id = m.id
${whereClause}
ORDER BY msg.ts ASC, msg.id ASC
`
)
.all(...params) as Array<{
id: number
senderId: number
content: string
platformId: string
name: string
}>
// 统计计数器
const originatorCount = new Map<number, number>() // 原创者计数
const initiatorCount = new Map<number, number>() // 挑起者计数
const breakerCount = new Map<number, number>() // 终结者计数
const memberMessageCount = new Map<number, number>() // 每个成员的发言总数
// 成员信息缓存
const memberInfo = new Map<number, { platformId: string; name: string }>()
// 复读链长度统计
const chainLengthCount = new Map<number, number>() // length -> count
// 热门复读内容统计(记录最长链的原创者)
const contentStats = new Map<string, { count: number; maxChainLength: number; originatorId: number }>()
// 滑动窗口算法
let currentContent: string | null = null
let repeatChain: Array<{ senderId: number; content: string }> = []
let totalRepeatChains = 0
let totalChainLength = 0 // 用于计算平均长度
// 处理复读链的辅助函数(至少3人参与才算复读)
const processRepeatChain = (chain: Array<{ senderId: number; content: string }>, breakerId?: number) => {
if (chain.length < 3) return
totalRepeatChains++
const chainLength = chain.length
totalChainLength += chainLength
// 原创者
const originatorId = chain[0].senderId
originatorCount.set(originatorId, (originatorCount.get(originatorId) || 0) + 1)
// 挑起者
const initiatorId = chain[1].senderId
initiatorCount.set(initiatorId, (initiatorCount.get(initiatorId) || 0) + 1)
// 终结者
if (breakerId !== undefined) {
breakerCount.set(breakerId, (breakerCount.get(breakerId) || 0) + 1)
}
// 复读链长度统计
chainLengthCount.set(chainLength, (chainLengthCount.get(chainLength) || 0) + 1)
// 热门复读内容统计(记录最长链的原创者)
const content = chain[0].content
const existing = contentStats.get(content)
if (existing) {
existing.count++
// 如果当前链更长,更新最长链信息和原创者
if (chainLength > existing.maxChainLength) {
existing.maxChainLength = chainLength
existing.originatorId = originatorId
}
} else {
contentStats.set(content, { count: 1, maxChainLength: chainLength, originatorId })
}
}
for (const msg of messages) {
// 缓存成员信息
if (!memberInfo.has(msg.senderId)) {
memberInfo.set(msg.senderId, { platformId: msg.platformId, name: msg.name })
}
// 统计每个成员的发言总数
memberMessageCount.set(msg.senderId, (memberMessageCount.get(msg.senderId) || 0) + 1)
const content = msg.content.trim()
if (content === currentContent) {
// 内容相同
const lastSender = repeatChain[repeatChain.length - 1]?.senderId
if (lastSender !== msg.senderId) {
// 不同人发的相同内容,延续复读链
repeatChain.push({ senderId: msg.senderId, content })
}
// 同一人连续发相同内容,忽略(不算复读)
} else {
// 内容不同,检查是否形成了复读
processRepeatChain(repeatChain, msg.senderId)
// 开始新链
currentContent = content
repeatChain = [{ senderId: msg.senderId, content }]
}
}
// 处理最后一个复读链(如果存在,没有终结者)
processRepeatChain(repeatChain)
// 构建绝对次数排行榜
const buildRankList = (countMap: Map<number, number>, total: number): RepeatStatItem[] => {
const items: RepeatStatItem[] = []
for (const [memberId, count] of countMap.entries()) {
const info = memberInfo.get(memberId)
if (info) {
items.push({
memberId,
platformId: info.platformId,
name: info.name,
count,
percentage: total > 0 ? Math.round((count / total) * 10000) / 100 : 0,
})
}
}
return items.sort((a, b) => b.count - a.count)
}
// 构建复读率排行榜
const buildRateList = (countMap: Map<number, number>): RepeatRateItem[] => {
const items: RepeatRateItem[] = []
for (const [memberId, count] of countMap.entries()) {
const info = memberInfo.get(memberId)
const totalMessages = memberMessageCount.get(memberId) || 0
if (info && totalMessages > 0) {
items.push({
memberId,
platformId: info.platformId,
name: info.name,
count,
totalMessages,
rate: Math.round((count / totalMessages) * 10000) / 100,
})
}
}
// 按复读率降序排序
return items.sort((a, b) => b.rate - a.rate)
}
// 构建复读链长度分布
const chainLengthDistribution: ChainLengthDistribution[] = []
for (const [length, count] of chainLengthCount.entries()) {
chainLengthDistribution.push({ length, count })
}
chainLengthDistribution.sort((a, b) => a.length - b.length)
// 构建最长复读链 TOP 10(按单次复读链长度排序)
const hotContents: HotRepeatContent[] = []
for (const [content, stats] of contentStats.entries()) {
const originatorInfo = memberInfo.get(stats.originatorId)
hotContents.push({
content,
count: stats.count,
maxChainLength: stats.maxChainLength,
originatorName: originatorInfo?.name || '未知',
})
}
// 按最长复读链长度降序排序
hotContents.sort((a, b) => b.maxChainLength - a.maxChainLength)
const top10HotContents = hotContents.slice(0, 10)
return {
originators: buildRankList(originatorCount, totalRepeatChains),
initiators: buildRankList(initiatorCount, totalRepeatChains),
breakers: buildRankList(breakerCount, totalRepeatChains),
originatorRates: buildRateList(originatorCount),
initiatorRates: buildRateList(initiatorCount),
breakerRates: buildRateList(breakerCount),
chainLengthDistribution,
hotContents: top10HotContents,
avgChainLength: totalRepeatChains > 0 ? Math.round((totalChainLength / totalRepeatChains) * 100) / 100 : 0,
totalRepeatChains,
}
} finally {
db.close()
}
}
+15
View File
@@ -360,6 +360,21 @@ const mainIpcMain = (win: BrowserWindow) => {
ipcMain.handle('chat:getSupportedFormats', async () => {
return parser.getSupportedFormats()
})
/**
* 获取复读分析数据
*/
ipcMain.handle(
'chat:getRepeatAnalysis',
async (_, sessionId: string, filter?: { startTs?: number; endTs?: number }) => {
try {
return database.getRepeatAnalysis(sessionId, filter)
} catch (error) {
console.error('获取复读分析失败:', error)
return { originators: [], initiators: [], breakers: [], totalRepeatChains: 0 }
}
}
)
}
export default mainIpcMain
+2
View File
@@ -7,6 +7,7 @@ import type {
DailyActivity,
MessageType,
ImportProgress,
RepeatAnalysis,
} from '../../src/types/chat'
interface TimeFilter {
@@ -33,6 +34,7 @@ interface ChatApi {
getDbDirectory: () => Promise<string | null>
getSupportedFormats: () => Promise<Array<{ name: string; platform: string }>>
onImportProgress: (callback: (progress: ImportProgress) => void) => () => void
getRepeatAnalysis: (sessionId: string, filter?: TimeFilter) => Promise<RepeatAnalysis>
}
interface Api {
+8
View File
@@ -8,6 +8,7 @@ import type {
DailyActivity,
MessageType,
ImportProgress,
RepeatAnalysis,
} from '../../src/types/chat'
// Custom APIs for renderer
@@ -152,6 +153,13 @@ const chatApi = {
ipcRenderer.removeListener('chat:importProgress', handler)
}
},
/**
* 获取复读分析数据
*/
getRepeatAnalysis: (sessionId: string, filter?: { startTs?: number; endTs?: number }): Promise<RepeatAnalysis> => {
return ipcRenderer.invoke('chat:getRepeatAnalysis', sessionId, filter)
},
}
// Use `contextBridge` APIs to expose Electron APIs to
+1
View File
@@ -238,6 +238,7 @@ onMounted(loadData)
v-else-if="activeTab === 'members'"
:session-id="currentSessionId!"
:member-activity="memberActivity"
:time-filter="timeFilter"
/>
<TimelineTab
v-else-if="activeTab === 'timeline'"
+241 -3
View File
@@ -1,14 +1,91 @@
<script setup lang="ts">
import { computed, ref, watch } from 'vue'
import type { MemberActivity, MemberNameHistory } from '@/types/chat'
import { MemberRankList } from '@/components/charts'
import type { MemberRankItem } from '@/components/charts'
import type { MemberActivity, MemberNameHistory, RepeatAnalysis } from '@/types/chat'
import { MemberRankList, BarChart } from '@/components/charts'
import type { MemberRankItem, BarChartData } from '@/components/charts'
interface TimeFilter {
startTs?: number
endTs?: number
}
const props = defineProps<{
sessionId: string
memberActivity: MemberActivity[]
timeFilter?: TimeFilter
}>()
// ==================== 复读分析 ====================
const repeatAnalysis = ref<RepeatAnalysis | null>(null)
const isLoadingRepeat = ref(false)
// 复读排行榜显示模式:count(绝对次数)或 rate(复读率)
const repeatRankMode = ref<'count' | 'rate'>('rate')
// 转换复读数据为排行榜格式(绝对次数)
const originatorRankData = computed<MemberRankItem[]>(() => {
if (!repeatAnalysis.value) return []
const data = repeatRankMode.value === 'count' ? repeatAnalysis.value.originators : repeatAnalysis.value.originatorRates
return data.slice(0, 10).map((m) => ({
id: m.memberId.toString(),
name: m.name,
value: repeatRankMode.value === 'count' ? (m as any).count : (m as any).count,
percentage: repeatRankMode.value === 'count' ? (m as any).percentage : (m as any).rate,
}))
})
const initiatorRankData = computed<MemberRankItem[]>(() => {
if (!repeatAnalysis.value) return []
const data = repeatRankMode.value === 'count' ? repeatAnalysis.value.initiators : repeatAnalysis.value.initiatorRates
return data.slice(0, 10).map((m) => ({
id: m.memberId.toString(),
name: m.name,
value: repeatRankMode.value === 'count' ? (m as any).count : (m as any).count,
percentage: repeatRankMode.value === 'count' ? (m as any).percentage : (m as any).rate,
}))
})
const breakerRankData = computed<MemberRankItem[]>(() => {
if (!repeatAnalysis.value) return []
const data = repeatRankMode.value === 'count' ? repeatAnalysis.value.breakers : repeatAnalysis.value.breakerRates
return data.slice(0, 10).map((m) => ({
id: m.memberId.toString(),
name: m.name,
value: repeatRankMode.value === 'count' ? (m as any).count : (m as any).count,
percentage: repeatRankMode.value === 'count' ? (m as any).percentage : (m as any).rate,
}))
})
// 复读链长度分布图表数据
const chainLengthChartData = computed<BarChartData>(() => {
if (!repeatAnalysis.value) return { labels: [], values: [] }
const distribution = repeatAnalysis.value.chainLengthDistribution
return {
labels: distribution.map((d) => `${d.length}`),
values: distribution.map((d) => d.count),
}
})
// 加载复读分析数据
async function loadRepeatAnalysis() {
if (!props.sessionId) return
isLoadingRepeat.value = true
try {
repeatAnalysis.value = await window.chatApi.getRepeatAnalysis(props.sessionId, props.timeFilter)
} catch (error) {
console.error('加载复读分析失败:', error)
} finally {
isLoadingRepeat.value = false
}
}
// 截断过长的复读内容
function truncateContent(content: string, maxLength = 30): string {
if (content.length <= maxLength) return content
return content.slice(0, maxLength) + '...'
}
// Top 10 排行榜数据
const top10RankData = computed<MemberRankItem[]>(() => {
return props.memberActivity.slice(0, 10).map((m) => ({
@@ -85,6 +162,15 @@ watch(
{ immediate: true }
)
// 监听 sessionId 和 timeFilter 变化,重新加载复读分析
watch(
() => [props.sessionId, props.timeFilter],
() => {
loadRepeatAnalysis()
},
{ immediate: true, deep: true }
)
// 格式化时间段(用于横向展示)
function formatPeriod(startTs: number, endTs: number | null): string {
const formatDate = (ts: number) => {
@@ -192,5 +278,157 @@ function formatPeriod(startTs: number, endTs: number | null): string {
<div v-else class="px-5 py-8 text-center text-sm text-gray-400">正在加载昵称变更记录...</div>
</div>
<!-- 复读分析模块 -->
<div class="rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900">
<div class="flex items-center justify-between border-b border-gray-200 px-5 py-4 dark:border-gray-800">
<div>
<h3 class="font-semibold text-gray-900 dark:text-white">复读分析</h3>
<p class="mt-1 text-sm text-gray-500 dark:text-gray-400">
{{
isLoadingRepeat
? '加载中...'
: repeatAnalysis
? `共检测到 ${repeatAnalysis.totalRepeatChains} 次复读,平均复读链长度 ${repeatAnalysis.avgChainLength}`
: '暂无复读数据'
}}
</p>
</div>
<!-- 排序切换按钮 -->
<div v-if="repeatAnalysis && repeatAnalysis.totalRepeatChains > 0" class="flex gap-1">
<UButton
size="xs"
:variant="repeatRankMode === 'rate' ? 'solid' : 'ghost'"
:color="repeatRankMode === 'rate' ? 'primary' : 'gray'"
@click="repeatRankMode = 'rate'"
>
按复读率
</UButton>
<UButton
size="xs"
:variant="repeatRankMode === 'count' ? 'solid' : 'ghost'"
:color="repeatRankMode === 'count' ? 'primary' : 'gray'"
@click="repeatRankMode = 'count'"
>
按次数
</UButton>
</div>
</div>
<div v-if="isLoadingRepeat" class="px-5 py-8 text-center text-sm text-gray-400">正在分析复读数据...</div>
<div v-else-if="repeatAnalysis && repeatAnalysis.totalRepeatChains > 0" class="space-y-6 p-5">
<!-- 复读链长度分布 & 最火复读内容 -->
<div class="grid grid-cols-1 gap-6 lg:grid-cols-2">
<!-- 复读链长度分布 -->
<div class="rounded-lg border border-gray-100 bg-gray-50/50 dark:border-gray-800 dark:bg-gray-800/50">
<div class="border-b border-gray-100 px-4 py-3 dark:border-gray-800">
<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300">📊 复读链长度分布</h4>
<p class="mt-0.5 text-xs text-gray-500 dark:text-gray-400">每次复读有多少人参与</p>
</div>
<div class="p-4">
<BarChart
v-if="chainLengthChartData.labels.length > 0"
:data="chainLengthChartData"
:height="200"
/>
<div v-else class="py-6 text-center text-sm text-gray-400">暂无数据</div>
</div>
</div>
<!-- 最长复读链 TOP 10 -->
<div class="rounded-lg border border-gray-100 bg-gray-50/50 dark:border-gray-800 dark:bg-gray-800/50">
<div class="border-b border-gray-100 px-4 py-3 dark:border-gray-800">
<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300">🏆 最长复读链 TOP 10</h4>
<p class="mt-0.5 text-xs text-gray-500 dark:text-gray-400">单次复读参与人数最多的内容</p>
</div>
<div v-if="repeatAnalysis.hotContents.length > 0" class="divide-y divide-gray-100 dark:divide-gray-800">
<div
v-for="(item, index) in repeatAnalysis.hotContents"
:key="index"
class="flex items-center gap-3 px-4 py-3"
>
<span
class="flex h-6 w-6 shrink-0 items-center justify-center rounded-full text-xs font-bold"
:class="
index === 0
? 'bg-amber-100 text-amber-600 dark:bg-amber-900/30 dark:text-amber-400'
: index === 1
? 'bg-gray-200 text-gray-600 dark:bg-gray-700 dark:text-gray-400'
: index === 2
? 'bg-orange-100 text-orange-600 dark:bg-orange-900/30 dark:text-orange-400'
: 'bg-gray-100 text-gray-500 dark:bg-gray-800 dark:text-gray-500'
"
>
{{ index + 1 }}
</span>
<span class="shrink-0 text-lg font-bold text-[#de335e]">{{ item.maxChainLength }}</span>
<div class="flex flex-1 items-center gap-1 overflow-hidden text-sm">
<span class="shrink-0 font-medium text-gray-900 dark:text-white">{{ item.originatorName }}</span>
<span class="truncate text-gray-600 dark:text-gray-400" :title="item.content">
{{ truncateContent(item.content) }}
</span>
</div>
<span class="shrink-0 text-xs text-gray-500">{{ item.count }} </span>
</div>
</div>
<div v-else class="px-4 py-6 text-center text-sm text-gray-400">暂无数据</div>
</div>
</div>
<!-- 最容易产生复读原创者 -->
<div class="rounded-lg border border-gray-100 bg-gray-50/50 dark:border-gray-800 dark:bg-gray-800/50">
<div class="border-b border-gray-100 px-4 py-3 dark:border-gray-800">
<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300">🎯 谁的聊天最容易产生复读</h4>
<p class="mt-0.5 text-xs text-gray-500 dark:text-gray-400">
{{ repeatRankMode === 'rate' ? '被复读次数 / 总发言数' : '发出的消息被别人复读的次数' }}
</p>
</div>
<MemberRankList
v-if="originatorRankData.length > 0"
:members="originatorRankData"
:session-id="sessionId"
:clickable="true"
/>
<div v-else class="px-4 py-6 text-center text-sm text-gray-400">暂无数据</div>
</div>
<!-- 最喜欢挑起复读挑起者 -->
<div class="rounded-lg border border-gray-100 bg-gray-50/50 dark:border-gray-800 dark:bg-gray-800/50">
<div class="border-b border-gray-100 px-4 py-3 dark:border-gray-800">
<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300">🔥 谁最喜欢挑起复读</h4>
<p class="mt-0.5 text-xs text-gray-500 dark:text-gray-400">
{{ repeatRankMode === 'rate' ? '挑起复读次数 / 总发言数' : '第二个发送相同消息、带起节奏的人' }}
</p>
</div>
<MemberRankList
v-if="initiatorRankData.length > 0"
:members="initiatorRankData"
:session-id="sessionId"
:clickable="true"
/>
<div v-else class="px-4 py-6 text-center text-sm text-gray-400">暂无数据</div>
</div>
<!-- 最喜欢打断复读终结者 -->
<div class="rounded-lg border border-gray-100 bg-gray-50/50 dark:border-gray-800 dark:bg-gray-800/50">
<div class="border-b border-gray-100 px-4 py-3 dark:border-gray-800">
<h4 class="text-sm font-medium text-gray-700 dark:text-gray-300"> 谁喜欢打断复读</h4>
<p class="mt-0.5 text-xs text-gray-500 dark:text-gray-400">
{{ repeatRankMode === 'rate' ? '打断复读次数 / 总发言数' : '终结复读链的人' }}
</p>
</div>
<MemberRankList
v-if="breakerRankData.length > 0"
:members="breakerRankData"
:session-id="sessionId"
:clickable="true"
/>
<div v-else class="px-4 py-6 text-center text-sm text-gray-400">暂无数据</div>
</div>
</div>
<div v-else class="px-5 py-8 text-center text-sm text-gray-400">该群组暂无复读记录</div>
</div>
</div>
</template>
+72
View File
@@ -175,3 +175,75 @@ export interface ImportResult {
sessionId?: string // 成功时返回会话ID
error?: string // 失败时返回错误信息
}
// ==================== 复读分析类型 ====================
/**
* 复读统计项(单个成员)- 绝对次数
*/
export interface RepeatStatItem {
memberId: number
platformId: string
name: string
count: number // 统计次数
percentage: number // 占总复读链的百分比
}
/**
* 复读率统计项(单个成员)- 相对比例
*/
export interface RepeatRateItem {
memberId: number
platformId: string
name: string
count: number // 复读相关次数
totalMessages: number // 该成员总发言数
rate: number // 复读率(百分比)
}
/**
* 复读链长度分布项
*/
export interface ChainLengthDistribution {
length: number // 复读链长度(参与人数)
count: number // 出现次数
}
/**
* 热门复读内容项
*/
export interface HotRepeatContent {
content: string // 复读内容
count: number // 被复读次数
maxChainLength: number // 最长复读链长度
originatorName: string // 最长链的原创者名称
}
/**
* 复读分析结果
*/
export interface RepeatAnalysis {
/** 谁的聊天最容易产生复读(原创者)- 绝对次数 */
originators: RepeatStatItem[]
/** 谁最喜欢挑起复读(第二个复读的人)- 绝对次数 */
initiators: RepeatStatItem[]
/** 谁喜欢打断复读(终结者)- 绝对次数 */
breakers: RepeatStatItem[]
/** 被复读率排名(相对比例) */
originatorRates: RepeatRateItem[]
/** 挑起复读率排名(相对比例) */
initiatorRates: RepeatRateItem[]
/** 打断复读率排名(相对比例) */
breakerRates: RepeatRateItem[]
/** 复读链长度分布 */
chainLengthDistribution: ChainLengthDistribution[]
/** 最火复读内容 TOP 10 */
hotContents: HotRepeatContent[]
/** 平均复读链长度 */
avgChainLength: number
/** 复读链总数 */
totalRepeatChains: number
}