Files
cultivation-world-simulator/tools/i18n/check_po_duplicates.py
2026-02-06 00:43:08 +08:00

152 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""检查 po 文件中是否有重复的 msgid"""
import re
import sys
from pathlib import Path
from collections import Counter
def extract_msgids(filepath: Path) -> list[str]:
"""
从 po 文件中提取所有 msgid
Args:
filepath: po 文件路径
Returns:
msgid 列表(不包含空字符串)
"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 匹配 msgid "..." 模式
pattern = r'msgid\s+"([^"]*)"'
matches = re.findall(pattern, content)
# 过滤掉空字符串(文件头的 msgid ""
msgids = [m for m in matches if m]
return msgids
def find_duplicates(msgids: list[str]) -> dict[str, int]:
"""
找出重复的 msgid
Args:
msgids: msgid 列表
Returns:
字典,键为重复的 msgid值为出现次数
"""
counter = Counter(msgids)
duplicates = {msgid: count for msgid, count in counter.items() if count > 1}
return duplicates
def check_file(filepath: Path, lang_name: str) -> tuple[int, dict[str, int]]:
"""
检查单个 po 文件
Args:
filepath: po 文件路径
lang_name: 语言名称(用于显示)
Returns:
(msgid总数, 重复项字典)
"""
print(f"\n{'='*60}")
print(f"检查文件: {lang_name}")
print(f"路径: {filepath}")
print(f"{'='*60}")
if not filepath.exists():
print(f"[ERROR] 文件不存在")
return 0, {}
msgids = extract_msgids(filepath)
print(f"总共找到 {len(msgids)} 个 msgid 条目")
duplicates = find_duplicates(msgids)
if duplicates:
print(f"\n[WARNING] 发现 {len(duplicates)} 个重复的 msgid:")
for msgid, count in sorted(duplicates.items()):
print(f" - '{msgid}' 出现了 {count}")
else:
print(f"\n[OK] 未发现重复的 msgid")
return len(msgids), duplicates
def main():
"""主函数"""
# 获取项目根目录
script_dir = Path(__file__).parent
project_root = script_dir.parent.parent
# po 文件路径
zh_file = project_root / "static" / "locales" / "zh-CN" / "LC_MESSAGES" / "messages.po"
en_file = project_root / "static" / "locales" / "en-US" / "LC_MESSAGES" / "messages.po"
# 检查中文文件
zh_count, zh_dups = check_file(zh_file, "中文 (zh_CN)")
# 检查英文文件
en_count, en_dups = check_file(en_file, "英文 (en_US)")
# 打印总结
print(f"\n{'='*60}")
print("检查总结")
print(f"{'='*60}")
has_error = False
if zh_dups or en_dups:
print("[ERROR] 发现重复条目,需要修复")
has_error = True
else:
print("[OK] 两个文件都没有重复的 msgid")
if zh_count != en_count:
print(f"[WARNING] 中英文 msgid 数量不一致: 中文 {zh_count} 个, 英文 {en_count}")
has_error = True
else:
print(f"[OK] 中英文 msgid 数量一致: {zh_count}")
# 检查 msgid 键是否匹配
if zh_count > 0 and en_count > 0:
zh_msgids = set(extract_msgids(zh_file))
en_msgids = set(extract_msgids(en_file))
zh_only = zh_msgids - en_msgids
en_only = en_msgids - zh_msgids
if zh_only:
print(f"\n[WARNING] 只在中文中存在的 msgid ({len(zh_only)} 个):")
for msgid in sorted(zh_only)[:5]:
print(f" - '{msgid}'")
if len(zh_only) > 5:
print(f" ... 还有 {len(zh_only) - 5}")
has_error = True
if en_only:
print(f"\n[WARNING] 只在英文中存在的 msgid ({len(en_only)} 个):")
for msgid in sorted(en_only)[:5]:
print(f" - '{msgid}'")
if len(en_only) > 5:
print(f" ... 还有 {len(en_only) - 5}")
has_error = True
if not zh_only and not en_only:
print("[OK] 中英文 msgid 键完全匹配")
# 返回状态码
return 1 if has_error else 0
if __name__ == "__main__":
sys.exit(main())