mirror of
https://github.com/hellodigua/ChatLab.git
synced 2026-06-13 19:21:46 +08:00
feat: 互动频率分析
This commit is contained in:
@@ -12,6 +12,14 @@ export { getNightOwlAnalysis, getDragonKingAnalysis, getDivingAnalysis, getCheck
|
||||
// 行为分析:斗图
|
||||
export { getMemeBattleAnalysis } from './behavior'
|
||||
|
||||
// 社交分析:@ 互动、含笑量
|
||||
export { getMentionAnalysis, getMentionGraph, getLaughAnalysis } from './social'
|
||||
export type { MentionGraphData, MentionGraphNode, MentionGraphLink } from './social'
|
||||
// 社交分析:@ 互动、含笑量、小团体
|
||||
export { getMentionAnalysis, getMentionGraph, getLaughAnalysis, getClusterGraph } from './social'
|
||||
export type {
|
||||
MentionGraphData,
|
||||
MentionGraphNode,
|
||||
MentionGraphLink,
|
||||
ClusterGraphData,
|
||||
ClusterGraphNode,
|
||||
ClusterGraphLink,
|
||||
ClusterGraphOptions,
|
||||
} from './social'
|
||||
|
||||
@@ -670,3 +670,343 @@ export function getLaughAnalysis(sessionId: string, filter?: TimeFilter, keyword
|
||||
groupLaughRate: Math.round((totalLaughs / totalMessages) * 10000) / 100,
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 小团体关系图(时间相邻共现) ====================
|
||||
|
||||
/**
|
||||
* 小团体关系图参数
|
||||
*/
|
||||
export interface ClusterGraphOptions {
|
||||
/** 向后看几个不同发言者(默认3) */
|
||||
lookAhead?: number
|
||||
/** 时间衰减常数(秒,默认120) */
|
||||
decaySeconds?: number
|
||||
/** 最多保留边数(默认100) */
|
||||
topEdges?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* 小团体图节点
|
||||
*/
|
||||
export interface ClusterGraphNode {
|
||||
id: number
|
||||
name: string
|
||||
messageCount: number
|
||||
symbolSize: number
|
||||
degree: number
|
||||
normalizedDegree: number
|
||||
}
|
||||
|
||||
/**
|
||||
* 小团体图边
|
||||
*/
|
||||
export interface ClusterGraphLink {
|
||||
source: string
|
||||
target: string
|
||||
value: number
|
||||
rawScore: number
|
||||
expectedScore: number
|
||||
coOccurrenceCount: number
|
||||
}
|
||||
|
||||
/**
|
||||
* 小团体图结果
|
||||
*/
|
||||
export interface ClusterGraphData {
|
||||
nodes: ClusterGraphNode[]
|
||||
links: ClusterGraphLink[]
|
||||
maxLinkValue: number
|
||||
communities: Array<{ id: number; name: string; size: number }>
|
||||
stats: {
|
||||
totalMembers: number
|
||||
totalMessages: number
|
||||
involvedMembers: number
|
||||
edgeCount: number
|
||||
communityCount: number
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_CLUSTER_OPTIONS = {
|
||||
lookAhead: 3,
|
||||
decaySeconds: 120,
|
||||
topEdges: 100,
|
||||
}
|
||||
|
||||
function roundNum(value: number, digits = 4): number {
|
||||
const factor = 10 ** digits
|
||||
return Math.round(value * factor) / factor
|
||||
}
|
||||
|
||||
function clusterPairKey(aId: number, bId: number): string {
|
||||
return aId < bId ? `${aId}-${bId}` : `${bId}-${aId}`
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取小团体关系图(基于时间相邻共现)
|
||||
*
|
||||
* 算法原理:
|
||||
* 1. 相邻定义:消息A发出后,后续N个不同发言者视为与A的发言者"相邻"
|
||||
* 2. 时间衰减:越快出现的相邻者权重越高 (exp(-delta/decay))
|
||||
* 3. 归一化:raw_score / expected_score,去除"话唠偏差"
|
||||
* 4. 社区检测:加权标签传播
|
||||
*/
|
||||
export function getClusterGraph(
|
||||
sessionId: string,
|
||||
filter?: TimeFilter,
|
||||
options?: ClusterGraphOptions
|
||||
): ClusterGraphData {
|
||||
const db = openDatabase(sessionId)
|
||||
const opts = { ...DEFAULT_CLUSTER_OPTIONS, ...options }
|
||||
|
||||
const emptyResult: ClusterGraphData = {
|
||||
nodes: [],
|
||||
links: [],
|
||||
maxLinkValue: 0,
|
||||
communities: [],
|
||||
stats: {
|
||||
totalMembers: 0,
|
||||
totalMessages: 0,
|
||||
involvedMembers: 0,
|
||||
edgeCount: 0,
|
||||
communityCount: 0,
|
||||
},
|
||||
}
|
||||
|
||||
if (!db) return emptyResult
|
||||
|
||||
// 1. 查询所有成员
|
||||
const members = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
id,
|
||||
platform_id as platformId,
|
||||
COALESCE(group_nickname, account_name, platform_id) as name,
|
||||
(SELECT COUNT(*) FROM message WHERE sender_id = member.id) as messageCount
|
||||
FROM member
|
||||
WHERE COALESCE(account_name, '') != '系统消息'
|
||||
`
|
||||
)
|
||||
.all() as Array<{ id: number; platformId: string; name: string; messageCount: number }>
|
||||
|
||||
if (members.length < 2) return { ...emptyResult, stats: { ...emptyResult.stats, totalMembers: members.length } }
|
||||
|
||||
const memberInfo = new Map<number, { name: string; platformId: string; messageCount: number }>()
|
||||
for (const m of members) {
|
||||
memberInfo.set(m.id, { name: m.name, platformId: m.platformId, messageCount: m.messageCount })
|
||||
}
|
||||
|
||||
// 2. 查询消息(按时间排序)
|
||||
const { clause, params } = buildTimeFilter(filter)
|
||||
let whereClause = clause
|
||||
if (whereClause.includes('WHERE')) {
|
||||
whereClause += " AND COALESCE(m.account_name, '') != '系统消息'"
|
||||
} else {
|
||||
whereClause = " WHERE COALESCE(m.account_name, '') != '系统消息'"
|
||||
}
|
||||
|
||||
const messages = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT msg.sender_id as senderId, msg.ts as ts
|
||||
FROM message msg
|
||||
JOIN member m ON msg.sender_id = m.id
|
||||
${whereClause}
|
||||
ORDER BY msg.ts ASC, msg.id ASC
|
||||
`
|
||||
)
|
||||
.all(...params) as Array<{ senderId: number; ts: number }>
|
||||
|
||||
if (messages.length < 2) {
|
||||
return { ...emptyResult, stats: { ...emptyResult.stats, totalMembers: members.length, totalMessages: messages.length } }
|
||||
}
|
||||
|
||||
// 3. 统计每个成员的消息数(用于归一化)
|
||||
const memberMsgCount = new Map<number, number>()
|
||||
for (const msg of messages) {
|
||||
memberMsgCount.set(msg.senderId, (memberMsgCount.get(msg.senderId) || 0) + 1)
|
||||
}
|
||||
|
||||
const totalMessages = messages.length
|
||||
|
||||
// 4. 计算成员对的原始相邻分数
|
||||
const pairRawScore = new Map<string, number>()
|
||||
const pairCoOccurrence = new Map<string, number>()
|
||||
|
||||
for (let i = 0; i < messages.length - 1; i++) {
|
||||
const anchor = messages[i]
|
||||
const seenPartners = new Set<number>()
|
||||
let partnersFound = 0
|
||||
|
||||
// 向后看 lookAhead 个不同发言者
|
||||
for (let j = i + 1; j < messages.length && partnersFound < opts.lookAhead; j++) {
|
||||
const candidate = messages[j]
|
||||
|
||||
// 跳过同一发言者
|
||||
if (candidate.senderId === anchor.senderId) continue
|
||||
// 跳过已计入的发言者
|
||||
if (seenPartners.has(candidate.senderId)) continue
|
||||
|
||||
seenPartners.add(candidate.senderId)
|
||||
partnersFound++
|
||||
|
||||
// 计算时间衰减权重
|
||||
const deltaSeconds = (candidate.ts - anchor.ts) / 1000
|
||||
const decayWeight = Math.exp(-deltaSeconds / opts.decaySeconds)
|
||||
// 位置衰减:第1个邻居权重1,第2个0.8,第3个0.6
|
||||
const positionWeight = 1 - (partnersFound - 1) * 0.2
|
||||
|
||||
const weight = decayWeight * positionWeight
|
||||
const key = clusterPairKey(anchor.senderId, candidate.senderId)
|
||||
|
||||
pairRawScore.set(key, (pairRawScore.get(key) || 0) + weight)
|
||||
pairCoOccurrence.set(key, (pairCoOccurrence.get(key) || 0) + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 归一化:计算期望分数并除以期望
|
||||
// 期望公式:expected = (A消息数/总数) × (B消息数/总数) × 总消息数 × 平均窗口覆盖率
|
||||
// 简化:expected ≈ (A消息数 × B消息数) / 总消息数 × lookAhead因子
|
||||
const lookAheadFactor = opts.lookAhead * 0.8 // 平均每条消息能覆盖的邻居数
|
||||
|
||||
// 收集所有边和分数
|
||||
const rawEdges: Array<{
|
||||
sourceId: number
|
||||
targetId: number
|
||||
rawScore: number
|
||||
expectedScore: number
|
||||
normalizedScore: number
|
||||
coOccurrenceCount: number
|
||||
}> = []
|
||||
|
||||
for (const [key, rawScore] of pairRawScore) {
|
||||
const [aIdStr, bIdStr] = key.split('-')
|
||||
const aId = parseInt(aIdStr)
|
||||
const bId = parseInt(bIdStr)
|
||||
|
||||
const aMsgCount = memberMsgCount.get(aId) || 0
|
||||
const bMsgCount = memberMsgCount.get(bId) || 0
|
||||
|
||||
// 期望分数(保留用于参考)
|
||||
const expectedScore = ((aMsgCount * bMsgCount) / totalMessages) * lookAheadFactor
|
||||
const normalizedScore = expectedScore > 0 ? rawScore / expectedScore : 0
|
||||
|
||||
rawEdges.push({
|
||||
sourceId: aId,
|
||||
targetId: bId,
|
||||
rawScore,
|
||||
expectedScore,
|
||||
normalizedScore,
|
||||
coOccurrenceCount: pairCoOccurrence.get(key) || 0,
|
||||
})
|
||||
}
|
||||
|
||||
// 计算最大分数,用于归一化到 [0, 1]
|
||||
const maxRawScore = Math.max(...rawEdges.map((e) => e.rawScore), 1)
|
||||
const maxNormalizedScore = Math.max(...rawEdges.map((e) => e.normalizedScore), 1)
|
||||
|
||||
// 混合分数:50% 原始分数 + 50% 归一化分数
|
||||
const edges = rawEdges.map((e) => {
|
||||
const hybridScore = 0.5 * (e.rawScore / maxRawScore) + 0.5 * (e.normalizedScore / maxNormalizedScore)
|
||||
|
||||
return {
|
||||
...e,
|
||||
rawScore: roundNum(e.rawScore),
|
||||
expectedScore: roundNum(e.expectedScore),
|
||||
normalizedScore: roundNum(e.normalizedScore),
|
||||
hybridScore: roundNum(hybridScore),
|
||||
}
|
||||
})
|
||||
|
||||
// 6. 按原始分数排序,取 Top N
|
||||
edges.sort((a, b) => b.hybridScore - a.hybridScore)
|
||||
const keptEdges = edges.slice(0, opts.topEdges)
|
||||
|
||||
if (keptEdges.length === 0) {
|
||||
return {
|
||||
...emptyResult,
|
||||
stats: { ...emptyResult.stats, totalMembers: members.length, totalMessages: messages.length },
|
||||
}
|
||||
}
|
||||
|
||||
// 7. 找出参与的成员
|
||||
const involvedIds = new Set<number>()
|
||||
for (const edge of keptEdges) {
|
||||
involvedIds.add(edge.sourceId)
|
||||
involvedIds.add(edge.targetId)
|
||||
}
|
||||
|
||||
// 8. 计算节点度数(使用混合分数)
|
||||
const nodeDegree = new Map<number, number>()
|
||||
for (const edge of keptEdges) {
|
||||
nodeDegree.set(edge.sourceId, (nodeDegree.get(edge.sourceId) || 0) + edge.hybridScore)
|
||||
nodeDegree.set(edge.targetId, (nodeDegree.get(edge.targetId) || 0) + edge.hybridScore)
|
||||
}
|
||||
const maxDegree = Math.max(...nodeDegree.values(), 1)
|
||||
|
||||
// 10. 构建唯一显示名称(处理同名)
|
||||
const nameCount = new Map<string, number>()
|
||||
for (const id of involvedIds) {
|
||||
const name = memberInfo.get(id)?.name || String(id)
|
||||
nameCount.set(name, (nameCount.get(name) || 0) + 1)
|
||||
}
|
||||
|
||||
const displayNames = new Map<number, string>()
|
||||
for (const id of involvedIds) {
|
||||
const info = memberInfo.get(id)
|
||||
const baseName = info?.name || String(id)
|
||||
if ((nameCount.get(baseName) || 0) > 1) {
|
||||
displayNames.set(id, `${baseName}#${(info?.platformId || String(id)).slice(-4)}`)
|
||||
} else {
|
||||
displayNames.set(id, baseName)
|
||||
}
|
||||
}
|
||||
|
||||
// 11. 构建输出
|
||||
const maxMsgCount = Math.max(...[...involvedIds].map((id) => memberInfo.get(id)?.messageCount || 0), 1)
|
||||
|
||||
const nodes: ClusterGraphNode[] = [...involvedIds].map((id) => {
|
||||
const info = memberInfo.get(id)!
|
||||
const degree = nodeDegree.get(id) || 0
|
||||
const normalizedDegree = degree / maxDegree
|
||||
const msgNorm = info.messageCount / maxMsgCount
|
||||
// 节点大小:70% 基于度数,30% 基于消息数
|
||||
const symbolSize = 20 + (0.7 * normalizedDegree + 0.3 * msgNorm) * 35
|
||||
|
||||
return {
|
||||
id,
|
||||
name: displayNames.get(id)!,
|
||||
messageCount: info.messageCount,
|
||||
symbolSize: Math.round(symbolSize),
|
||||
degree: roundNum(degree),
|
||||
normalizedDegree: roundNum(normalizedDegree),
|
||||
}
|
||||
})
|
||||
|
||||
nodes.sort((a, b) => b.degree - a.degree)
|
||||
|
||||
const maxLinkValue = keptEdges.length > 0 ? Math.max(...keptEdges.map((e) => e.hybridScore)) : 0
|
||||
|
||||
const links: ClusterGraphLink[] = keptEdges.map((e) => ({
|
||||
source: displayNames.get(e.sourceId)!,
|
||||
target: displayNames.get(e.targetId)!,
|
||||
value: e.hybridScore, // 使用混合分数作为主要输出
|
||||
rawScore: e.rawScore,
|
||||
expectedScore: e.expectedScore,
|
||||
coOccurrenceCount: e.coOccurrenceCount,
|
||||
}))
|
||||
|
||||
return {
|
||||
nodes,
|
||||
links,
|
||||
maxLinkValue: roundNum(maxLinkValue),
|
||||
communities: [], // 保留字段兼容性,但不再计算
|
||||
stats: {
|
||||
totalMembers: members.length,
|
||||
totalMessages: messages.length,
|
||||
involvedMembers: involvedIds.size,
|
||||
edgeCount: keptEdges.length,
|
||||
communityCount: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user