release: opensource snapshot 2026-02-27 19:25:00

2026-02-27 19:25:00 +08:00
commit 5de9622c8b
1055 changed files with 164772 additions and 0 deletions
--- a/extract_chinese.py
+++ b/extract_chinese.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""
+提取React/TypeScript代码中的硬编码中文字符串
+"""
+import re
+import os
+from pathlib import Path
+import json
+
+def extract_chinese_strings(file_path):
+    """提取文件中的中文字符串"""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+    except:
+        return []
+    
+    results = []
+    
+    # 匹配JSX/TSX中的中文字符串
+    # 1. {' 中文 '} 或 {"中文"}
+    pattern1 = r'\{\s*[\'"]([^\'"\{\}]*[\u4e00-\u9fff]+[^\'"\{\}]*)[\'\"]\s*\}'
+    # 2. >中文< 
+    pattern2 = r'\>([^<\>]*[\u4e00-\u9fff]+[^<\>]*)\<'
+    # 3. placeholder="中文" 等属性
+    pattern3 = r'(?:placeholder|title|alt|value|defaultValue|confirmText|cancelText|message)\s*=\s*[\'"]([^\'\"]*[\u4e00-\u9fff]+[^\'\"]*)[\'"]'
+    # 4. 字符串默认值 = '中文'
+    pattern4 = r'=\s*[\'"]([^\'\"]*[\u4e00-\u9fff]+[^\'\"]*)[\'"]'
+    
+    for pattern in [pattern1, pattern2, pattern3, pattern4]:
+        matches = re.finditer(pattern, content)
+        for match in matches:
+            chinese_text = match.group(1).strip()
+            if chinese_text and len(chinese_text) > 0:
+                # 跳过注释
+                line_num = content[:match.start()].count('\n') + 1
+                line = content.split('\n')[line_num - 1]
+                if '//' in line and line.index('//') < line.find(chinese_text):
+                    continue
+                results.append({
+                    'text': chinese_text,
+                    'line': line_num,
+                    'category': 'unknown'
+                })
+    
+    # 去重
+    seen = set()
+    unique_results = []
+    for r in results:
+        key = f"{r['text']}_{r['line']}"
+        if key not in seen:
+            seen.add(key)
+            unique_results.append(r)
+    
+    return unique_results
+
+def scan_directory(base_path,exclude_patterns=['test-ui']):
+    """扫描目录中的所有TSX/TS文件"""
+    all_findings = {}
+    
+    for root, dirs, files in os.walk(base_path):
+        # 排除特定目录
+        dirs[:] = [d for d in dirs if d not in exclude_patterns and not d.startswith('.')]
+        
+        for file in files:
+            if file.endswith(('.tsx', '.ts')):
+                file_path = os.path.join(root, file)
+                relative_path = os.path.relpath(file_path, base_path)
+                
+                findings = extract_chinese_strings(file_path)
+                if findings:
+                    all_findings[relative_path] = findings
+    
+    return all_findings
+
+if __name__ == '__main__':
+    base_dir = 'src'
+    results = scan_directory(base_dir)
+    
+    # 输出结果
+    total = 0
+    for file_path, findings in sorted(results.items()):
+        if findings:
+            print(f"\n## {file_path} ({len(findings)} strings)")
+            for finding in findings[:10]:  # 只显示前10个
+                print(f"  Line {finding['line']}: {finding['text'][:60]}")
+            total += len(findings)
+            if len(findings) > 10:
+                print(f"  ... and {len(findings) - 10} more")
+    
+    print(f"\n\n总计: {len(results)} 个文件, {total} 处硬编码中文")