release: opensource snapshot 2026-02-27 19:25:00
This commit is contained in:
91
extract_chinese.py
Normal file
91
extract_chinese.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
提取React/TypeScript代码中的硬编码中文字符串
|
||||
"""
|
||||
import re
|
||||
import os
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
def extract_chinese_strings(file_path):
|
||||
"""提取文件中的中文字符串"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except:
|
||||
return []
|
||||
|
||||
results = []
|
||||
|
||||
# 匹配JSX/TSX中的中文字符串
|
||||
# 1. {' 中文 '} 或 {"中文"}
|
||||
pattern1 = r'\{\s*[\'"]([^\'"\{\}]*[\u4e00-\u9fff]+[^\'"\{\}]*)[\'\"]\s*\}'
|
||||
# 2. >中文<
|
||||
pattern2 = r'\>([^<\>]*[\u4e00-\u9fff]+[^<\>]*)\<'
|
||||
# 3. placeholder="中文" 等属性
|
||||
pattern3 = r'(?:placeholder|title|alt|value|defaultValue|confirmText|cancelText|message)\s*=\s*[\'"]([^\'\"]*[\u4e00-\u9fff]+[^\'\"]*)[\'"]'
|
||||
# 4. 字符串默认值 = '中文'
|
||||
pattern4 = r'=\s*[\'"]([^\'\"]*[\u4e00-\u9fff]+[^\'\"]*)[\'"]'
|
||||
|
||||
for pattern in [pattern1, pattern2, pattern3, pattern4]:
|
||||
matches = re.finditer(pattern, content)
|
||||
for match in matches:
|
||||
chinese_text = match.group(1).strip()
|
||||
if chinese_text and len(chinese_text) > 0:
|
||||
# 跳过注释
|
||||
line_num = content[:match.start()].count('\n') + 1
|
||||
line = content.split('\n')[line_num - 1]
|
||||
if '//' in line and line.index('//') < line.find(chinese_text):
|
||||
continue
|
||||
results.append({
|
||||
'text': chinese_text,
|
||||
'line': line_num,
|
||||
'category': 'unknown'
|
||||
})
|
||||
|
||||
# 去重
|
||||
seen = set()
|
||||
unique_results = []
|
||||
for r in results:
|
||||
key = f"{r['text']}_{r['line']}"
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_results.append(r)
|
||||
|
||||
return unique_results
|
||||
|
||||
def scan_directory(base_path,exclude_patterns=['test-ui']):
|
||||
"""扫描目录中的所有TSX/TS文件"""
|
||||
all_findings = {}
|
||||
|
||||
for root, dirs, files in os.walk(base_path):
|
||||
# 排除特定目录
|
||||
dirs[:] = [d for d in dirs if d not in exclude_patterns and not d.startswith('.')]
|
||||
|
||||
for file in files:
|
||||
if file.endswith(('.tsx', '.ts')):
|
||||
file_path = os.path.join(root, file)
|
||||
relative_path = os.path.relpath(file_path, base_path)
|
||||
|
||||
findings = extract_chinese_strings(file_path)
|
||||
if findings:
|
||||
all_findings[relative_path] = findings
|
||||
|
||||
return all_findings
|
||||
|
||||
if __name__ == '__main__':
|
||||
base_dir = 'src'
|
||||
results = scan_directory(base_dir)
|
||||
|
||||
# 输出结果
|
||||
total = 0
|
||||
for file_path, findings in sorted(results.items()):
|
||||
if findings:
|
||||
print(f"\n## {file_path} ({len(findings)} strings)")
|
||||
for finding in findings[:10]: # 只显示前10个
|
||||
print(f" Line {finding['line']}: {finding['text'][:60]}")
|
||||
total += len(findings)
|
||||
if len(findings) > 10:
|
||||
print(f" ... and {len(findings) - 10} more")
|
||||
|
||||
print(f"\n\n总计: {len(results)} 个文件, {total} 处硬编码中文")
|
||||
Reference in New Issue
Block a user