feat: 互动频率分析

2026-06-13 19:21:46 +08:00 · 2026-02-10 23:36:03 +08:00
parent 2d6c4d085a
commit 448f28da14
12 changed files with 1151 additions and 3 deletions
@@ -12,6 +12,14 @@ export { getNightOwlAnalysis, getDragonKingAnalysis, getDivingAnalysis, getCheck
 // 行为分析：斗图
 export { getMemeBattleAnalysis } from './behavior'

-// 社交分析：@ 互动、含笑量
-export { getMentionAnalysis, getMentionGraph, getLaughAnalysis } from './social'
-export type { MentionGraphData, MentionGraphNode, MentionGraphLink } from './social'
+// 社交分析：@ 互动、含笑量、小团体
+export { getMentionAnalysis, getMentionGraph, getLaughAnalysis, getClusterGraph } from './social'
+export type {
+  MentionGraphData,
+  MentionGraphNode,
+  MentionGraphLink,
+  ClusterGraphData,
+  ClusterGraphNode,
+  ClusterGraphLink,
+  ClusterGraphOptions,
+} from './social'
@@ -670,3 +670,343 @@ export function getLaughAnalysis(sessionId: string, filter?: TimeFilter, keyword
    groupLaughRate: Math.round((totalLaughs / totalMessages) * 10000) / 100,
  }
 }
+
+// ==================== 小团体关系图（时间相邻共现） ====================
+
+/**
+ * 小团体关系图参数
+ */
+export interface ClusterGraphOptions {
+  /** 向后看几个不同发言者（默认3） */
+  lookAhead?: number
+  /** 时间衰减常数（秒，默认120） */
+  decaySeconds?: number
+  /** 最多保留边数（默认100） */
+  topEdges?: number
+}
+
+/**
+ * 小团体图节点
+ */
+export interface ClusterGraphNode {
+  id: number
+  name: string
+  messageCount: number
+  symbolSize: number
+  degree: number
+  normalizedDegree: number
+}
+
+/**
+ * 小团体图边
+ */
+export interface ClusterGraphLink {
+  source: string
+  target: string
+  value: number
+  rawScore: number
+  expectedScore: number
+  coOccurrenceCount: number
+}
+
+/**
+ * 小团体图结果
+ */
+export interface ClusterGraphData {
+  nodes: ClusterGraphNode[]
+  links: ClusterGraphLink[]
+  maxLinkValue: number
+  communities: Array<{ id: number; name: string; size: number }>
+  stats: {
+    totalMembers: number
+    totalMessages: number
+    involvedMembers: number
+    edgeCount: number
+    communityCount: number
+  }
+}
+
+const DEFAULT_CLUSTER_OPTIONS = {
+  lookAhead: 3,
+  decaySeconds: 120,
+  topEdges: 100,
+}
+
+function roundNum(value: number, digits = 4): number {
+  const factor = 10 ** digits
+  return Math.round(value * factor) / factor
+}
+
+function clusterPairKey(aId: number, bId: number): string {
+  return aId < bId ? `${aId}-${bId}` : `${bId}-${aId}`
+}
+
+/**
+ * 获取小团体关系图（基于时间相邻共现）
+ *
+ * 算法原理：
+ * 1. 相邻定义：消息A发出后，后续N个不同发言者视为与A的发言者"相邻"
+ * 2. 时间衰减：越快出现的相邻者权重越高 (exp(-delta/decay))
+ * 3. 归一化：raw_score / expected_score，去除"话唠偏差"
+ * 4. 社区检测：加权标签传播
+ */
+export function getClusterGraph(
+  sessionId: string,
+  filter?: TimeFilter,
+  options?: ClusterGraphOptions
+): ClusterGraphData {
+  const db = openDatabase(sessionId)
+  const opts = { ...DEFAULT_CLUSTER_OPTIONS, ...options }
+
+  const emptyResult: ClusterGraphData = {
+    nodes: [],
+    links: [],
+    maxLinkValue: 0,
+    communities: [],
+    stats: {
+      totalMembers: 0,
+      totalMessages: 0,
+      involvedMembers: 0,
+      edgeCount: 0,
+      communityCount: 0,
+    },
+  }
+
+  if (!db) return emptyResult
+
+  // 1. 查询所有成员
+  const members = db
+    .prepare(
+      `
+      SELECT 
+        id, 
+        platform_id as platformId, 
+        COALESCE(group_nickname, account_name, platform_id) as name,
+        (SELECT COUNT(*) FROM message WHERE sender_id = member.id) as messageCount
+      FROM member
+      WHERE COALESCE(account_name, '') != '系统消息'
+    `
+    )
+    .all() as Array<{ id: number; platformId: string; name: string; messageCount: number }>
+
+  if (members.length < 2) return { ...emptyResult, stats: { ...emptyResult.stats, totalMembers: members.length } }
+
+  const memberInfo = new Map<number, { name: string; platformId: string; messageCount: number }>()
+  for (const m of members) {
+    memberInfo.set(m.id, { name: m.name, platformId: m.platformId, messageCount: m.messageCount })
+  }
+
+  // 2. 查询消息（按时间排序）
+  const { clause, params } = buildTimeFilter(filter)
+  let whereClause = clause
+  if (whereClause.includes('WHERE')) {
+    whereClause += " AND COALESCE(m.account_name, '') != '系统消息'"
+  } else {
+    whereClause = " WHERE COALESCE(m.account_name, '') != '系统消息'"
+  }
+
+  const messages = db
+    .prepare(
+      `
+      SELECT msg.sender_id as senderId, msg.ts as ts
+      FROM message msg
+      JOIN member m ON msg.sender_id = m.id
+      ${whereClause}
+      ORDER BY msg.ts ASC, msg.id ASC
+    `
+    )
+    .all(...params) as Array<{ senderId: number; ts: number }>
+
+  if (messages.length < 2) {
+    return { ...emptyResult, stats: { ...emptyResult.stats, totalMembers: members.length, totalMessages: messages.length } }
+  }
+
+  // 3. 统计每个成员的消息数（用于归一化）
+  const memberMsgCount = new Map<number, number>()
+  for (const msg of messages) {
+    memberMsgCount.set(msg.senderId, (memberMsgCount.get(msg.senderId) || 0) + 1)
+  }
+
+  const totalMessages = messages.length
+
+  // 4. 计算成员对的原始相邻分数
+  const pairRawScore = new Map<string, number>()
+  const pairCoOccurrence = new Map<string, number>()
+
+  for (let i = 0; i < messages.length - 1; i++) {
+    const anchor = messages[i]
+    const seenPartners = new Set<number>()
+    let partnersFound = 0
+
+    // 向后看 lookAhead 个不同发言者
+    for (let j = i + 1; j < messages.length && partnersFound < opts.lookAhead; j++) {
+      const candidate = messages[j]
+
+      // 跳过同一发言者
+      if (candidate.senderId === anchor.senderId) continue
+      // 跳过已计入的发言者
+      if (seenPartners.has(candidate.senderId)) continue
+
+      seenPartners.add(candidate.senderId)
+      partnersFound++
+
+      // 计算时间衰减权重
+      const deltaSeconds = (candidate.ts - anchor.ts) / 1000
+      const decayWeight = Math.exp(-deltaSeconds / opts.decaySeconds)
+      // 位置衰减：第1个邻居权重1，第2个0.8，第3个0.6
+      const positionWeight = 1 - (partnersFound - 1) * 0.2
+
+      const weight = decayWeight * positionWeight
+      const key = clusterPairKey(anchor.senderId, candidate.senderId)
+
+      pairRawScore.set(key, (pairRawScore.get(key) || 0) + weight)
+      pairCoOccurrence.set(key, (pairCoOccurrence.get(key) || 0) + 1)
+    }
+  }
+
+  // 5. 归一化：计算期望分数并除以期望
+  // 期望公式：expected = (A消息数/总数) × (B消息数/总数) × 总消息数 × 平均窗口覆盖率
+  // 简化：expected ≈ (A消息数 × B消息数) / 总消息数 × lookAhead因子
+  const lookAheadFactor = opts.lookAhead * 0.8 // 平均每条消息能覆盖的邻居数
+
+  // 收集所有边和分数
+  const rawEdges: Array<{
+    sourceId: number
+    targetId: number
+    rawScore: number
+    expectedScore: number
+    normalizedScore: number
+    coOccurrenceCount: number
+  }> = []
+
+  for (const [key, rawScore] of pairRawScore) {
+    const [aIdStr, bIdStr] = key.split('-')
+    const aId = parseInt(aIdStr)
+    const bId = parseInt(bIdStr)
+
+    const aMsgCount = memberMsgCount.get(aId) || 0
+    const bMsgCount = memberMsgCount.get(bId) || 0
+
+    // 期望分数（保留用于参考）
+    const expectedScore = ((aMsgCount * bMsgCount) / totalMessages) * lookAheadFactor
+    const normalizedScore = expectedScore > 0 ? rawScore / expectedScore : 0
+
+    rawEdges.push({
+      sourceId: aId,
+      targetId: bId,
+      rawScore,
+      expectedScore,
+      normalizedScore,
+      coOccurrenceCount: pairCoOccurrence.get(key) || 0,
+    })
+  }
+
+  // 计算最大分数，用于归一化到 [0, 1]
+  const maxRawScore = Math.max(...rawEdges.map((e) => e.rawScore), 1)
+  const maxNormalizedScore = Math.max(...rawEdges.map((e) => e.normalizedScore), 1)
+
+  // 混合分数：50% 原始分数 + 50% 归一化分数
+  const edges = rawEdges.map((e) => {
+    const hybridScore = 0.5 * (e.rawScore / maxRawScore) + 0.5 * (e.normalizedScore / maxNormalizedScore)
+
+    return {
+      ...e,
+      rawScore: roundNum(e.rawScore),
+      expectedScore: roundNum(e.expectedScore),
+      normalizedScore: roundNum(e.normalizedScore),
+      hybridScore: roundNum(hybridScore),
+    }
+  })
+
+  // 6. 按原始分数排序，取 Top N
+  edges.sort((a, b) => b.hybridScore - a.hybridScore)
+  const keptEdges = edges.slice(0, opts.topEdges)
+
+  if (keptEdges.length === 0) {
+    return {
+      ...emptyResult,
+      stats: { ...emptyResult.stats, totalMembers: members.length, totalMessages: messages.length },
+    }
+  }
+
+  // 7. 找出参与的成员
+  const involvedIds = new Set<number>()
+  for (const edge of keptEdges) {
+    involvedIds.add(edge.sourceId)
+    involvedIds.add(edge.targetId)
+  }
+
+  // 8. 计算节点度数（使用混合分数）
+  const nodeDegree = new Map<number, number>()
+  for (const edge of keptEdges) {
+    nodeDegree.set(edge.sourceId, (nodeDegree.get(edge.sourceId) || 0) + edge.hybridScore)
+    nodeDegree.set(edge.targetId, (nodeDegree.get(edge.targetId) || 0) + edge.hybridScore)
+  }
+  const maxDegree = Math.max(...nodeDegree.values(), 1)
+
+  // 10. 构建唯一显示名称（处理同名）
+  const nameCount = new Map<string, number>()
+  for (const id of involvedIds) {
+    const name = memberInfo.get(id)?.name || String(id)
+    nameCount.set(name, (nameCount.get(name) || 0) + 1)
+  }
+
+  const displayNames = new Map<number, string>()
+  for (const id of involvedIds) {
+    const info = memberInfo.get(id)
+    const baseName = info?.name || String(id)
+    if ((nameCount.get(baseName) || 0) > 1) {
+      displayNames.set(id, `${baseName}#${(info?.platformId || String(id)).slice(-4)}`)
+    } else {
+      displayNames.set(id, baseName)
+    }
+  }
+
+  // 11. 构建输出
+  const maxMsgCount = Math.max(...[...involvedIds].map((id) => memberInfo.get(id)?.messageCount || 0), 1)
+
+  const nodes: ClusterGraphNode[] = [...involvedIds].map((id) => {
+    const info = memberInfo.get(id)!
+    const degree = nodeDegree.get(id) || 0
+    const normalizedDegree = degree / maxDegree
+    const msgNorm = info.messageCount / maxMsgCount
+    // 节点大小：70% 基于度数，30% 基于消息数
+    const symbolSize = 20 + (0.7 * normalizedDegree + 0.3 * msgNorm) * 35
+
+    return {
+      id,
+      name: displayNames.get(id)!,
+      messageCount: info.messageCount,
+      symbolSize: Math.round(symbolSize),
+      degree: roundNum(degree),
+      normalizedDegree: roundNum(normalizedDegree),
+    }
+  })
+
+  nodes.sort((a, b) => b.degree - a.degree)
+
+  const maxLinkValue = keptEdges.length > 0 ? Math.max(...keptEdges.map((e) => e.hybridScore)) : 0
+
+  const links: ClusterGraphLink[] = keptEdges.map((e) => ({
+    source: displayNames.get(e.sourceId)!,
+    target: displayNames.get(e.targetId)!,
+    value: e.hybridScore, // 使用混合分数作为主要输出
+    rawScore: e.rawScore,
+    expectedScore: e.expectedScore,
+    coOccurrenceCount: e.coOccurrenceCount,
+  }))
+
+  return {
+    nodes,
+    links,
+    maxLinkValue: roundNum(maxLinkValue),
+    communities: [], // 保留字段兼容性，但不再计算
+    stats: {
+      totalMembers: members.length,
+      totalMessages: messages.length,
+      involvedMembers: involvedIds.size,
+      edgeCount: keptEdges.length,
+      communityCount: 0,
+    },
+  }
+}