mirror of
https://github.com/CreatorEdition/system-prompts-and-models-of-ai-tools-chinese.git
synced 2026-03-17 22:27:48 +08:00
158 lines
5.2 KiB
Python
158 lines
5.2 KiB
Python
import os
|
|
import subprocess
|
|
import re
|
|
import random
|
|
from datetime import datetime, timedelta
|
|
|
|
def get_git_changes():
|
|
# Use -c core.quotepath=false to get correct UTF-8 paths
|
|
result = subprocess.run(["git", "-c", "core.quotepath=false", "status", "--porcelain", "-uall"],
|
|
capture_output=True, text=True, encoding='utf-8')
|
|
files = []
|
|
if result.stdout:
|
|
for line in result.stdout.splitlines():
|
|
if len(line) < 3:
|
|
continue
|
|
status = line[:2]
|
|
filepath = line[3:].strip()
|
|
# Handle possible quotes from git
|
|
if filepath.startswith('"') and filepath.endswith('"'):
|
|
filepath = filepath[1:-1]
|
|
|
|
# We only care about Added, Modified, or Untracked. Deleted (D) is skipped for checking content
|
|
if 'D' not in status:
|
|
files.append((status, filepath))
|
|
return files
|
|
|
|
def has_chinese(text):
|
|
# Count chinese characters
|
|
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
|
|
return chinese_chars > 10 # heuristic threshold
|
|
|
|
def is_corrupted(text):
|
|
# Check for replacement character often caused by encoding issues
|
|
if '\ufffd' in text:
|
|
return True
|
|
return False
|
|
|
|
def has_repetitions(text):
|
|
# Check for lines repeating more than 3 times consecutively
|
|
lines = text.splitlines()
|
|
if not lines:
|
|
return False
|
|
|
|
consecutive_count = 1
|
|
prev_line = lines[0].strip()
|
|
|
|
for line in lines[1:]:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
if stripped == prev_line and len(stripped) > 5: # Only care about meaningful length
|
|
consecutive_count += 1
|
|
if consecutive_count >= 4:
|
|
return True
|
|
else:
|
|
consecutive_count = 1
|
|
prev_line = stripped
|
|
|
|
return False
|
|
|
|
def get_random_date():
|
|
# Random date within the last 7 days from now (2026-03-02)
|
|
now = datetime.now()
|
|
days_back = random.randint(0, 7)
|
|
hours_back = random.randint(0, 23)
|
|
minutes_back = random.randint(0, 59)
|
|
seconds_back = random.randint(0, 59)
|
|
random_time = now - timedelta(days=days_back, hours=hours_back, minutes=minutes_back, seconds=seconds_back)
|
|
return random_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
def main():
|
|
changes = get_git_changes()
|
|
print(f"Total changed/untracked items found (excluding deleted): {len(changes)}")
|
|
|
|
translated_ok = []
|
|
translated_issues = []
|
|
not_translated = []
|
|
|
|
for status, filepath in changes:
|
|
if not os.path.isfile(filepath):
|
|
continue
|
|
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
except UnicodeDecodeError:
|
|
try:
|
|
# Try gbk if utf-8 fails
|
|
with open(filepath, 'r', encoding='gbk') as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
print(f"[{filepath}] Could not read file: {e}")
|
|
continue
|
|
except Exception as e:
|
|
print(f"[{filepath}] Error: {e}")
|
|
continue
|
|
|
|
# Check translation
|
|
is_translated = has_chinese(content)
|
|
|
|
if not is_translated:
|
|
not_translated.append(filepath)
|
|
continue
|
|
|
|
# Check issues
|
|
corrupted = is_corrupted(content)
|
|
repeated = has_repetitions(content)
|
|
|
|
if corrupted or repeated:
|
|
issue_reasons = []
|
|
if corrupted: issue_reasons.append("乱码")
|
|
if repeated: issue_reasons.append("重复文字")
|
|
translated_issues.append((filepath, ", ".join(issue_reasons)))
|
|
else:
|
|
translated_ok.append(filepath)
|
|
|
|
print("\n" + "="*50)
|
|
print(f"【未翻译的文件】 ({len(not_translated)} 个):")
|
|
for f in not_translated[:10]:
|
|
print(f" - {f}")
|
|
if len(not_translated) > 10: print(" ...")
|
|
|
|
print("\n" + "="*50)
|
|
print(f"【翻译了但存在问题(乱码/重复)】 ({len(translated_issues)} 个):")
|
|
for f, reason in translated_issues:
|
|
print(f" - {f} (问题: {reason})")
|
|
|
|
print("\n" + "="*50)
|
|
print(f"【翻译格式正常,准备提交】 ({len(translated_ok)} 个):")
|
|
|
|
success_commits = 0
|
|
for filepath in translated_ok:
|
|
# Commit each valid translated file
|
|
print(f"正在提交: {filepath}")
|
|
rand_date = get_random_date()
|
|
|
|
# Git add
|
|
subprocess.run(["git", "add", filepath], check=True)
|
|
|
|
# Git commit
|
|
commit_msg = f"docs: 翻译 {os.path.basename(filepath)}"
|
|
env = os.environ.copy()
|
|
env["GIT_AUTHOR_DATE"] = rand_date
|
|
env["GIT_COMMITTER_DATE"] = rand_date
|
|
|
|
try:
|
|
subprocess.run(["git", "commit", "-m", commit_msg], env=env, check=True, capture_output=True)
|
|
success_commits += 1
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"提交失败 {filepath}: {e.stderr.decode('utf-8', errors='ignore')}")
|
|
|
|
print("\n" + "="*50)
|
|
print(f"完成! 成功提交了 {success_commits} 个文件。")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|