feat: 互动频率分析

This commit is contained in:
digua
2026-02-10 23:36:03 +08:00
parent 2d6c4d085a
commit 448f28da14
12 changed files with 1151 additions and 3 deletions
+11 -3
View File
@@ -12,6 +12,14 @@ export { getNightOwlAnalysis, getDragonKingAnalysis, getDivingAnalysis, getCheck
// 行为分析:斗图
export { getMemeBattleAnalysis } from './behavior'
// 社交分析:@ 互动、含笑量
export { getMentionAnalysis, getMentionGraph, getLaughAnalysis } from './social'
export type { MentionGraphData, MentionGraphNode, MentionGraphLink } from './social'
// 社交分析:@ 互动、含笑量、小团体
export { getMentionAnalysis, getMentionGraph, getLaughAnalysis, getClusterGraph } from './social'
export type {
MentionGraphData,
MentionGraphNode,
MentionGraphLink,
ClusterGraphData,
ClusterGraphNode,
ClusterGraphLink,
ClusterGraphOptions,
} from './social'
@@ -670,3 +670,343 @@ export function getLaughAnalysis(sessionId: string, filter?: TimeFilter, keyword
groupLaughRate: Math.round((totalLaughs / totalMessages) * 10000) / 100,
}
}
// ==================== 小团体关系图(时间相邻共现) ====================
/**
* 小团体关系图参数
*/
export interface ClusterGraphOptions {
/** 向后看几个不同发言者(默认3) */
lookAhead?: number
/** 时间衰减常数(秒,默认120) */
decaySeconds?: number
/** 最多保留边数(默认100) */
topEdges?: number
}
/**
* 小团体图节点
*/
export interface ClusterGraphNode {
id: number
name: string
messageCount: number
symbolSize: number
degree: number
normalizedDegree: number
}
/**
* 小团体图边
*/
export interface ClusterGraphLink {
source: string
target: string
value: number
rawScore: number
expectedScore: number
coOccurrenceCount: number
}
/**
* 小团体图结果
*/
export interface ClusterGraphData {
nodes: ClusterGraphNode[]
links: ClusterGraphLink[]
maxLinkValue: number
communities: Array<{ id: number; name: string; size: number }>
stats: {
totalMembers: number
totalMessages: number
involvedMembers: number
edgeCount: number
communityCount: number
}
}
const DEFAULT_CLUSTER_OPTIONS = {
lookAhead: 3,
decaySeconds: 120,
topEdges: 100,
}
function roundNum(value: number, digits = 4): number {
const factor = 10 ** digits
return Math.round(value * factor) / factor
}
function clusterPairKey(aId: number, bId: number): string {
return aId < bId ? `${aId}-${bId}` : `${bId}-${aId}`
}
/**
* 获取小团体关系图(基于时间相邻共现)
*
* 算法原理:
* 1. 相邻定义:消息A发出后,后续N个不同发言者视为与A的发言者"相邻"
* 2. 时间衰减:越快出现的相邻者权重越高 (exp(-delta/decay))
* 3. 归一化:raw_score / expected_score,去除"话唠偏差"
* 4. 社区检测:加权标签传播
*/
export function getClusterGraph(
sessionId: string,
filter?: TimeFilter,
options?: ClusterGraphOptions
): ClusterGraphData {
const db = openDatabase(sessionId)
const opts = { ...DEFAULT_CLUSTER_OPTIONS, ...options }
const emptyResult: ClusterGraphData = {
nodes: [],
links: [],
maxLinkValue: 0,
communities: [],
stats: {
totalMembers: 0,
totalMessages: 0,
involvedMembers: 0,
edgeCount: 0,
communityCount: 0,
},
}
if (!db) return emptyResult
// 1. 查询所有成员
const members = db
.prepare(
`
SELECT
id,
platform_id as platformId,
COALESCE(group_nickname, account_name, platform_id) as name,
(SELECT COUNT(*) FROM message WHERE sender_id = member.id) as messageCount
FROM member
WHERE COALESCE(account_name, '') != '系统消息'
`
)
.all() as Array<{ id: number; platformId: string; name: string; messageCount: number }>
if (members.length < 2) return { ...emptyResult, stats: { ...emptyResult.stats, totalMembers: members.length } }
const memberInfo = new Map<number, { name: string; platformId: string; messageCount: number }>()
for (const m of members) {
memberInfo.set(m.id, { name: m.name, platformId: m.platformId, messageCount: m.messageCount })
}
// 2. 查询消息(按时间排序)
const { clause, params } = buildTimeFilter(filter)
let whereClause = clause
if (whereClause.includes('WHERE')) {
whereClause += " AND COALESCE(m.account_name, '') != '系统消息'"
} else {
whereClause = " WHERE COALESCE(m.account_name, '') != '系统消息'"
}
const messages = db
.prepare(
`
SELECT msg.sender_id as senderId, msg.ts as ts
FROM message msg
JOIN member m ON msg.sender_id = m.id
${whereClause}
ORDER BY msg.ts ASC, msg.id ASC
`
)
.all(...params) as Array<{ senderId: number; ts: number }>
if (messages.length < 2) {
return { ...emptyResult, stats: { ...emptyResult.stats, totalMembers: members.length, totalMessages: messages.length } }
}
// 3. 统计每个成员的消息数(用于归一化)
const memberMsgCount = new Map<number, number>()
for (const msg of messages) {
memberMsgCount.set(msg.senderId, (memberMsgCount.get(msg.senderId) || 0) + 1)
}
const totalMessages = messages.length
// 4. 计算成员对的原始相邻分数
const pairRawScore = new Map<string, number>()
const pairCoOccurrence = new Map<string, number>()
for (let i = 0; i < messages.length - 1; i++) {
const anchor = messages[i]
const seenPartners = new Set<number>()
let partnersFound = 0
// 向后看 lookAhead 个不同发言者
for (let j = i + 1; j < messages.length && partnersFound < opts.lookAhead; j++) {
const candidate = messages[j]
// 跳过同一发言者
if (candidate.senderId === anchor.senderId) continue
// 跳过已计入的发言者
if (seenPartners.has(candidate.senderId)) continue
seenPartners.add(candidate.senderId)
partnersFound++
// 计算时间衰减权重
const deltaSeconds = (candidate.ts - anchor.ts) / 1000
const decayWeight = Math.exp(-deltaSeconds / opts.decaySeconds)
// 位置衰减:第1个邻居权重1,第2个0.8,第3个0.6
const positionWeight = 1 - (partnersFound - 1) * 0.2
const weight = decayWeight * positionWeight
const key = clusterPairKey(anchor.senderId, candidate.senderId)
pairRawScore.set(key, (pairRawScore.get(key) || 0) + weight)
pairCoOccurrence.set(key, (pairCoOccurrence.get(key) || 0) + 1)
}
}
// 5. 归一化:计算期望分数并除以期望
// 期望公式:expected = (A消息数/总数) × (B消息数/总数) × 总消息数 × 平均窗口覆盖率
// 简化:expected ≈ (A消息数 × B消息数) / 总消息数 × lookAhead因子
const lookAheadFactor = opts.lookAhead * 0.8 // 平均每条消息能覆盖的邻居数
// 收集所有边和分数
const rawEdges: Array<{
sourceId: number
targetId: number
rawScore: number
expectedScore: number
normalizedScore: number
coOccurrenceCount: number
}> = []
for (const [key, rawScore] of pairRawScore) {
const [aIdStr, bIdStr] = key.split('-')
const aId = parseInt(aIdStr)
const bId = parseInt(bIdStr)
const aMsgCount = memberMsgCount.get(aId) || 0
const bMsgCount = memberMsgCount.get(bId) || 0
// 期望分数(保留用于参考)
const expectedScore = ((aMsgCount * bMsgCount) / totalMessages) * lookAheadFactor
const normalizedScore = expectedScore > 0 ? rawScore / expectedScore : 0
rawEdges.push({
sourceId: aId,
targetId: bId,
rawScore,
expectedScore,
normalizedScore,
coOccurrenceCount: pairCoOccurrence.get(key) || 0,
})
}
// 计算最大分数,用于归一化到 [0, 1]
const maxRawScore = Math.max(...rawEdges.map((e) => e.rawScore), 1)
const maxNormalizedScore = Math.max(...rawEdges.map((e) => e.normalizedScore), 1)
// 混合分数:50% 原始分数 + 50% 归一化分数
const edges = rawEdges.map((e) => {
const hybridScore = 0.5 * (e.rawScore / maxRawScore) + 0.5 * (e.normalizedScore / maxNormalizedScore)
return {
...e,
rawScore: roundNum(e.rawScore),
expectedScore: roundNum(e.expectedScore),
normalizedScore: roundNum(e.normalizedScore),
hybridScore: roundNum(hybridScore),
}
})
// 6. 按原始分数排序,取 Top N
edges.sort((a, b) => b.hybridScore - a.hybridScore)
const keptEdges = edges.slice(0, opts.topEdges)
if (keptEdges.length === 0) {
return {
...emptyResult,
stats: { ...emptyResult.stats, totalMembers: members.length, totalMessages: messages.length },
}
}
// 7. 找出参与的成员
const involvedIds = new Set<number>()
for (const edge of keptEdges) {
involvedIds.add(edge.sourceId)
involvedIds.add(edge.targetId)
}
// 8. 计算节点度数(使用混合分数)
const nodeDegree = new Map<number, number>()
for (const edge of keptEdges) {
nodeDegree.set(edge.sourceId, (nodeDegree.get(edge.sourceId) || 0) + edge.hybridScore)
nodeDegree.set(edge.targetId, (nodeDegree.get(edge.targetId) || 0) + edge.hybridScore)
}
const maxDegree = Math.max(...nodeDegree.values(), 1)
// 10. 构建唯一显示名称(处理同名)
const nameCount = new Map<string, number>()
for (const id of involvedIds) {
const name = memberInfo.get(id)?.name || String(id)
nameCount.set(name, (nameCount.get(name) || 0) + 1)
}
const displayNames = new Map<number, string>()
for (const id of involvedIds) {
const info = memberInfo.get(id)
const baseName = info?.name || String(id)
if ((nameCount.get(baseName) || 0) > 1) {
displayNames.set(id, `${baseName}#${(info?.platformId || String(id)).slice(-4)}`)
} else {
displayNames.set(id, baseName)
}
}
// 11. 构建输出
const maxMsgCount = Math.max(...[...involvedIds].map((id) => memberInfo.get(id)?.messageCount || 0), 1)
const nodes: ClusterGraphNode[] = [...involvedIds].map((id) => {
const info = memberInfo.get(id)!
const degree = nodeDegree.get(id) || 0
const normalizedDegree = degree / maxDegree
const msgNorm = info.messageCount / maxMsgCount
// 节点大小:70% 基于度数,30% 基于消息数
const symbolSize = 20 + (0.7 * normalizedDegree + 0.3 * msgNorm) * 35
return {
id,
name: displayNames.get(id)!,
messageCount: info.messageCount,
symbolSize: Math.round(symbolSize),
degree: roundNum(degree),
normalizedDegree: roundNum(normalizedDegree),
}
})
nodes.sort((a, b) => b.degree - a.degree)
const maxLinkValue = keptEdges.length > 0 ? Math.max(...keptEdges.map((e) => e.hybridScore)) : 0
const links: ClusterGraphLink[] = keptEdges.map((e) => ({
source: displayNames.get(e.sourceId)!,
target: displayNames.get(e.targetId)!,
value: e.hybridScore, // 使用混合分数作为主要输出
rawScore: e.rawScore,
expectedScore: e.expectedScore,
coOccurrenceCount: e.coOccurrenceCount,
}))
return {
nodes,
links,
maxLinkValue: roundNum(maxLinkValue),
communities: [], // 保留字段兼容性,但不再计算
stats: {
totalMembers: members.length,
totalMessages: messages.length,
involvedMembers: involvedIds.size,
edgeCount: keptEdges.length,
communityCount: 0,
},
}
}