keep edit

2025-10-17 14:52:29 +08:00
parent 22185d799e
commit 517365879a
622 changed files with 40518 additions and 7298 deletions
--- a/remove_comments.py
+++ b/remove_comments.py
@@ -0,0 +1,451 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+安全移除代码注释工具
+支持 Objective-C, Swift, C/C++ 等语言
+"""
+
+import re
+import os
+import sys
+import argparse
+from pathlib import Path
+from typing import List, Tuple, Optional
+import shutil
+
+class CommentRemover:
+    """代码注释移除器"""
+    
+    def __init__(self, keep_copyright=True, keep_important=True, keep_pragma=True):
+        """
+        初始化注释移除器
+        
+        Args:
+            keep_copyright: 保留版权声明
+            keep_important: 保留重要注释（TODO, FIXME, NOTE等）
+            keep_pragma: 保留编译指令（#pragma mark等）
+        """
+        self.keep_copyright = keep_copyright
+        self.keep_important = keep_important
+        self.keep_pragma = keep_pragma
+        
+        # 重要关键词
+        self.important_keywords = [
+            'TODO', 'FIXME', 'HACK', 'NOTE', 'WARNING', 
+            'IMPORTANT', 'BUG', 'CRITICAL', 'MARK'
+        ]
+        
+        # 版权相关关键词
+        self.copyright_keywords = [
+            'Copyright', 'copyright', '©', 'LICENSE', 
+            'All rights reserved', 'Created by'
+        ]
+    
+    def is_copyright_comment(self, comment: str) -> bool:
+        """判断是否是版权声明注释"""
+        if not self.keep_copyright:
+            return False
+        return any(keyword in comment for keyword in self.copyright_keywords)
+    
+    def is_important_comment(self, comment: str) -> bool:
+        """判断是否是重要注释"""
+        if not self.keep_important:
+            return False
+        return any(keyword in comment for keyword in self.important_keywords)
+    
+    def is_pragma_comment(self, comment: str) -> bool:
+        """判断是否是编译指令注释"""
+        if not self.keep_pragma:
+            return False
+        return '#pragma' in comment or 'MARK:' in comment or 'MARK -' in comment
+    
+    def should_keep_comment(self, comment: str) -> bool:
+        """判断是否应该保留注释"""
+        return (self.is_copyright_comment(comment) or 
+                self.is_important_comment(comment) or
+                self.is_pragma_comment(comment))
+    
+    def remove_comments(self, code: str, file_extension: str) -> str:
+        """
+        移除代码中的注释
+        
+        Args:
+            code: 源代码字符串
+            file_extension: 文件扩展名
+            
+        Returns:
+            移除注释后的代码
+        """
+        result = []
+        i = 0
+        in_string = False
+        in_char = False
+        string_char = None
+        
+        while i < len(code):
+            # 处理字符串
+            if not in_string and not in_char:
+                # 检查字符串开始
+                if code[i] in ('"', "'"):
+                    string_char = code[i]
+                    in_string = True
+                    result.append(code[i])
+                    i += 1
+                    continue
+                
+                # Swift 的多行字符串
+                if file_extension == '.swift' and i + 2 < len(code) and code[i:i+3] == '"""':
+                    result.append('"""')
+                    i += 3
+                    # 跳过多行字符串内容
+                    while i < len(code):
+                        if i + 2 < len(code) and code[i:i+3] == '"""':
+                            result.append('"""')
+                            i += 3
+                            break
+                        result.append(code[i])
+                        i += 1
+                    continue
+                
+                # 检查单行注释 //
+                if i + 1 < len(code) and code[i:i+2] == '//':
+                    # 检查是否是 URL (http://, https://)
+                    if i >= 5 and code[i-5:i+2] == 'http://':
+                        result.append(code[i])
+                        i += 1
+                        continue
+                    if i >= 6 and code[i-6:i+2] == 'https://':
+                        result.append(code[i])
+                        i += 1
+                        continue
+                    
+                    # 检查是否是行尾注释（前面有非空白字符）
+                    line_start = len(result)
+                    for j in range(len(result) - 1, -1, -1):
+                        if result[j] == '\n':
+                            line_start = j + 1
+                            break
+                    
+                    # 获取当前行已有的内容
+                    current_line = ''.join(result[line_start:])
+                    has_code_before = current_line.strip() != ''
+                    
+                    # 提取注释内容
+                    comment_start = i
+                    i += 2
+                    while i < len(code) and code[i] != '\n':
+                        i += 1
+                    
+                    comment = code[comment_start:i]
+                    
+                    # 判断是否保留注释
+                    if self.should_keep_comment(comment):
+                        result.append(comment)
+                    elif has_code_before:
+                        # 行尾注释：移除注释但保留前面的空格
+                        # 移除注释前的多余空格，只保留一个空格的位置
+                        while len(result) > 0 and result[-1] in (' ', '\t'):
+                            result.pop()
+                    
+                    # 保留换行符
+                    if i < len(code) and code[i] == '\n':
+                        result.append('\n')
+                        i += 1
+                    continue
+                
+                # 检查多行注释 /* */ 或文档注释 /** */
+                if i + 1 < len(code) and code[i:i+2] == '/*':
+                    comment_start = i
+                    i += 2
+                    
+                    # 检查是否是文档注释 /** 
+                    is_doc_comment = (i < len(code) and code[i] == '*')
+                    
+                    # 查找注释结束
+                    while i < len(code):
+                        if i + 1 < len(code) and code[i:i+2] == '*/':
+                            i += 2
+                            break
+                        i += 1
+                    
+                    comment = code[comment_start:i]
+                    
+                    # 判断是否保留注释
+                    if self.should_keep_comment(comment):
+                        result.append(comment)
+                    else:
+                        # 检查注释前是否有代码（行尾注释）
+                        line_start = len(result)
+                        for j in range(len(result) - 1, -1, -1):
+                            if result[j] == '\n':
+                                line_start = j + 1
+                                break
+                        
+                        current_line = ''.join(result[line_start:])
+                        has_code_before = current_line.strip() != ''
+                        
+                        if has_code_before:
+                            # 行尾多行注释：移除注释前的多余空格
+                            while len(result) > 0 and result[-1] in (' ', '\t'):
+                                result.pop()
+                        else:
+                            # 独立的多行注释：保留最多1个换行符维持代码分段
+                            newline_count = comment.count('\n')
+                            if newline_count > 0:
+                                result.append('\n')
+                    
+                    continue
+            
+            # 在字符串内部
+            elif in_string:
+                # 检查转义字符
+                if code[i] == '\\' and i + 1 < len(code):
+                    result.append(code[i])
+                    result.append(code[i + 1])
+                    i += 2
+                    continue
+                
+                # 检查字符串结束
+                if code[i] == string_char:
+                    in_string = False
+                    string_char = None
+                
+                result.append(code[i])
+                i += 1
+                continue
+            
+            # 普通字符
+            result.append(code[i])
+            i += 1
+        
+        return ''.join(result)
+    
+    def clean_empty_lines(self, code: str) -> str:
+        """清理多余的空行（超过2行的空行压缩为2行）"""
+        lines = code.split('\n')
+        result = []
+        empty_count = 0
+        
+        for line in lines:
+            if line.strip() == '':
+                empty_count += 1
+                if empty_count <= 2:
+                    result.append(line)
+            else:
+                empty_count = 0
+                result.append(line)
+        
+        return '\n'.join(result)
+
+
+class FileProcessor:
+    """文件处理器"""
+    
+    # 支持的文件扩展名
+    SUPPORTED_EXTENSIONS = {'.m', '.h', '.mm', '.swift', '.c', '.cpp', '.cc', '.hpp'}
+    
+    def __init__(self, remover: CommentRemover, backup: bool = True, dry_run: bool = False):
+        """
+        初始化文件处理器
+        
+        Args:
+            remover: 注释移除器实例
+            backup: 是否备份原文件
+            dry_run: 是否仅预览（不实际修改文件）
+        """
+        self.remover = remover
+        self.backup = backup
+        self.dry_run = dry_run
+        self.stats = {
+            'processed': 0,
+            'skipped': 0,
+            'errors': 0
+        }
+    
+    def is_supported_file(self, filepath: Path) -> bool:
+        """检查文件是否支持"""
+        return filepath.suffix in self.SUPPORTED_EXTENSIONS
+    
+    def process_file(self, filepath: Path) -> bool:
+        """
+        处理单个文件
+        
+        Args:
+            filepath: 文件路径
+            
+        Returns:
+            处理是否成功
+        """
+        try:
+            if not self.is_supported_file(filepath):
+                print(f"⚠️  跳过不支持的文件: {filepath}")
+                self.stats['skipped'] += 1
+                return False
+            
+            # 读取文件
+            with open(filepath, 'r', encoding='utf-8') as f:
+                original_code = f.read()
+            
+            # 移除注释
+            cleaned_code = self.remover.remove_comments(
+                original_code, 
+                filepath.suffix
+            )
+            
+            # 清理空行
+            cleaned_code = self.remover.clean_empty_lines(cleaned_code)
+            
+            # 计算变化
+            original_lines = len(original_code.splitlines())
+            cleaned_lines = len(cleaned_code.splitlines())
+            removed_lines = original_lines - cleaned_lines
+            
+            if original_code == cleaned_code:
+                print(f"✓ 无需修改: {filepath}")
+                self.stats['skipped'] += 1
+                return True
+            
+            print(f"📝 {'[预览]' if self.dry_run else '处理'} {filepath}")
+            print(f"   删除 {removed_lines} 行注释 ({original_lines} → {cleaned_lines} 行)")
+            
+            if self.dry_run:
+                print(f"   [预览模式] 不会实际修改文件")
+                return True
+            
+            # 备份原文件
+            if self.backup:
+                backup_path = filepath.with_suffix(filepath.suffix + '.backup')
+                shutil.copy2(filepath, backup_path)
+                print(f"   备份: {backup_path}")
+            
+            # 写入清理后的代码
+            with open(filepath, 'w', encoding='utf-8') as f:
+                f.write(cleaned_code)
+            
+            print(f"✅ 完成: {filepath}")
+            self.stats['processed'] += 1
+            return True
+            
+        except Exception as e:
+            print(f"❌ 错误: {filepath}")
+            print(f"   {str(e)}")
+            self.stats['errors'] += 1
+            return False
+    
+    def process_directory(self, directory: Path, recursive: bool = True) -> None:
+        """
+        处理目录中的所有文件
+        
+        Args:
+            directory: 目录路径
+            recursive: 是否递归处理子目录
+        """
+        if recursive:
+            pattern = '**/*'
+        else:
+            pattern = '*'
+        
+        files = [f for f in directory.glob(pattern) if f.is_file()]
+        supported_files = [f for f in files if self.is_supported_file(f)]
+        
+        print(f"\n🔍 找到 {len(supported_files)} 个支持的文件\n")
+        
+        for filepath in supported_files:
+            self.process_file(filepath)
+        
+        self.print_summary()
+    
+    def print_summary(self) -> None:
+        """打印处理摘要"""
+        print("\n" + "=" * 60)
+        print("📊 处理摘要")
+        print("=" * 60)
+        print(f"✅ 已处理: {self.stats['processed']} 个文件")
+        print(f"⚠️  已跳过: {self.stats['skipped']} 个文件")
+        print(f"❌ 错误: {self.stats['errors']} 个文件")
+        print("=" * 60 + "\n")
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(
+        description='安全移除代码注释工具',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+  # 处理单个文件
+  python remove_comments.py path/to/file.m
+  
+  # 处理整个目录
+  python remove_comments.py YuMi/Global/ -r
+  
+  # 预览模式（不实际修改）
+  python remove_comments.py YuMi/Global/ -r --dry-run
+  
+  # 不保留版权信息和重要注释
+  python remove_comments.py file.m --no-copyright --no-important
+  
+  # 不创建备份文件
+  python remove_comments.py file.m --no-backup
+        """
+    )
+    
+    parser.add_argument('path', type=str, help='文件或目录路径')
+    parser.add_argument('-r', '--recursive', action='store_true', 
+                       help='递归处理子目录')
+    parser.add_argument('--dry-run', action='store_true', 
+                       help='预览模式（不实际修改文件）')
+    parser.add_argument('--no-backup', action='store_true', 
+                       help='不创建备份文件')
+    parser.add_argument('--no-copyright', action='store_true', 
+                       help='不保留版权声明')
+    parser.add_argument('--no-important', action='store_true', 
+                       help='不保留重要注释（TODO, FIXME等）')
+    parser.add_argument('--no-pragma', action='store_true', 
+                       help='不保留编译指令（#pragma mark等）')
+    
+    args = parser.parse_args()
+    
+    # 检查路径
+    path = Path(args.path)
+    if not path.exists():
+        print(f"❌ 路径不存在: {path}")
+        sys.exit(1)
+    
+    # 创建注释移除器
+    remover = CommentRemover(
+        keep_copyright=not args.no_copyright,
+        keep_important=not args.no_important,
+        keep_pragma=not args.no_pragma
+    )
+    
+    # 创建文件处理器
+    processor = FileProcessor(
+        remover=remover,
+        backup=not args.no_backup,
+        dry_run=args.dry_run
+    )
+    
+    # 打印配置
+    print("\n" + "=" * 60)
+    print("⚙️  配置")
+    print("=" * 60)
+    print(f"路径: {path}")
+    print(f"模式: {'预览' if args.dry_run else '实际修改'}")
+    print(f"备份: {'是' if not args.no_backup else '否'}")
+    print(f"保留版权: {'是' if not args.no_copyright else '否'}")
+    print(f"保留重要注释: {'是' if not args.no_important else '否'}")
+    print(f"保留编译指令: {'是' if not args.no_pragma else '否'}")
+    print("=" * 60)
+    
+    # 处理文件或目录
+    if path.is_file():
+        processor.process_file(path)
+        processor.print_summary()
+    else:
+        processor.process_directory(path, recursive=args.recursive)
+
+
+if __name__ == '__main__':
+    main()
+