#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 安全移除代码注释工具 支持 Objective-C, Swift, C/C++ 等语言 """ import re import os import sys import argparse from pathlib import Path from typing import List, Tuple, Optional import shutil class CommentRemover: """代码注释移除器""" def __init__(self, keep_copyright=True, keep_important=True, keep_pragma=True): """ 初始化注释移除器 Args: keep_copyright: 保留版权声明 keep_important: 保留重要注释(TODO, FIXME, NOTE等) keep_pragma: 保留编译指令(#pragma mark等) """ self.keep_copyright = keep_copyright self.keep_important = keep_important self.keep_pragma = keep_pragma # 重要关键词 self.important_keywords = [ 'TODO', 'FIXME', 'HACK', 'NOTE', 'WARNING', 'IMPORTANT', 'BUG', 'CRITICAL', 'MARK' ] # 版权相关关键词 self.copyright_keywords = [ 'Copyright', 'copyright', '©', 'LICENSE', 'All rights reserved', 'Created by' ] def is_copyright_comment(self, comment: str) -> bool: """判断是否是版权声明注释""" if not self.keep_copyright: return False return any(keyword in comment for keyword in self.copyright_keywords) def is_important_comment(self, comment: str) -> bool: """判断是否是重要注释""" if not self.keep_important: return False return any(keyword in comment for keyword in self.important_keywords) def is_pragma_comment(self, comment: str) -> bool: """判断是否是编译指令注释""" if not self.keep_pragma: return False return '#pragma' in comment or 'MARK:' in comment or 'MARK -' in comment def should_keep_comment(self, comment: str) -> bool: """判断是否应该保留注释""" return (self.is_copyright_comment(comment) or self.is_important_comment(comment) or self.is_pragma_comment(comment)) def remove_comments(self, code: str, file_extension: str) -> str: """ 移除代码中的注释 Args: code: 源代码字符串 file_extension: 文件扩展名 Returns: 移除注释后的代码 """ result = [] i = 0 in_string = False in_char = False string_char = None while i < len(code): # 处理字符串 if not in_string and not in_char: # 检查字符串开始 if code[i] in ('"', "'"): string_char = code[i] in_string = True result.append(code[i]) i += 1 continue # Swift 的多行字符串 if file_extension == '.swift' and i + 2 < len(code) and code[i:i+3] == '"""': result.append('"""') i += 3 # 跳过多行字符串内容 while i < len(code): if i + 2 < len(code) and code[i:i+3] == '"""': result.append('"""') i += 3 break result.append(code[i]) i += 1 continue # 检查单行注释 // if i + 1 < len(code) and code[i:i+2] == '//': # 检查是否是 URL (http://, https://) if i >= 5 and code[i-5:i+2] == 'http://': result.append(code[i]) i += 1 continue if i >= 6 and code[i-6:i+2] == 'https://': result.append(code[i]) i += 1 continue # 检查是否是行尾注释(前面有非空白字符) line_start = len(result) for j in range(len(result) - 1, -1, -1): if result[j] == '\n': line_start = j + 1 break # 获取当前行已有的内容 current_line = ''.join(result[line_start:]) has_code_before = current_line.strip() != '' # 提取注释内容 comment_start = i i += 2 while i < len(code) and code[i] != '\n': i += 1 comment = code[comment_start:i] # 判断是否保留注释 if self.should_keep_comment(comment): result.append(comment) elif has_code_before: # 行尾注释:移除注释但保留前面的空格 # 移除注释前的多余空格,只保留一个空格的位置 while len(result) > 0 and result[-1] in (' ', '\t'): result.pop() # 保留换行符 if i < len(code) and code[i] == '\n': result.append('\n') i += 1 continue # 检查多行注释 /* */ 或文档注释 /** */ if i + 1 < len(code) and code[i:i+2] == '/*': comment_start = i i += 2 # 检查是否是文档注释 /** is_doc_comment = (i < len(code) and code[i] == '*') # 查找注释结束 while i < len(code): if i + 1 < len(code) and code[i:i+2] == '*/': i += 2 break i += 1 comment = code[comment_start:i] # 判断是否保留注释 if self.should_keep_comment(comment): result.append(comment) else: # 检查注释前是否有代码(行尾注释) line_start = len(result) for j in range(len(result) - 1, -1, -1): if result[j] == '\n': line_start = j + 1 break current_line = ''.join(result[line_start:]) has_code_before = current_line.strip() != '' if has_code_before: # 行尾多行注释:移除注释前的多余空格 while len(result) > 0 and result[-1] in (' ', '\t'): result.pop() else: # 独立的多行注释:保留最多1个换行符维持代码分段 newline_count = comment.count('\n') if newline_count > 0: result.append('\n') continue # 在字符串内部 elif in_string: # 检查转义字符 if code[i] == '\\' and i + 1 < len(code): result.append(code[i]) result.append(code[i + 1]) i += 2 continue # 检查字符串结束 if code[i] == string_char: in_string = False string_char = None result.append(code[i]) i += 1 continue # 普通字符 result.append(code[i]) i += 1 return ''.join(result) def clean_empty_lines(self, code: str) -> str: """清理多余的空行(超过2行的空行压缩为2行)""" lines = code.split('\n') result = [] empty_count = 0 for line in lines: if line.strip() == '': empty_count += 1 if empty_count <= 2: result.append(line) else: empty_count = 0 result.append(line) return '\n'.join(result) class FileProcessor: """文件处理器""" # 支持的文件扩展名 SUPPORTED_EXTENSIONS = {'.m', '.h', '.mm', '.swift', '.c', '.cpp', '.cc', '.hpp'} def __init__(self, remover: CommentRemover, backup: bool = True, dry_run: bool = False): """ 初始化文件处理器 Args: remover: 注释移除器实例 backup: 是否备份原文件 dry_run: 是否仅预览(不实际修改文件) """ self.remover = remover self.backup = backup self.dry_run = dry_run self.stats = { 'processed': 0, 'skipped': 0, 'errors': 0 } def is_supported_file(self, filepath: Path) -> bool: """检查文件是否支持""" return filepath.suffix in self.SUPPORTED_EXTENSIONS def process_file(self, filepath: Path) -> bool: """ 处理单个文件 Args: filepath: 文件路径 Returns: 处理是否成功 """ try: if not self.is_supported_file(filepath): print(f"⚠️ 跳过不支持的文件: {filepath}") self.stats['skipped'] += 1 return False # 读取文件 with open(filepath, 'r', encoding='utf-8') as f: original_code = f.read() # 移除注释 cleaned_code = self.remover.remove_comments( original_code, filepath.suffix ) # 清理空行 cleaned_code = self.remover.clean_empty_lines(cleaned_code) # 计算变化 original_lines = len(original_code.splitlines()) cleaned_lines = len(cleaned_code.splitlines()) removed_lines = original_lines - cleaned_lines if original_code == cleaned_code: print(f"✓ 无需修改: {filepath}") self.stats['skipped'] += 1 return True print(f"📝 {'[预览]' if self.dry_run else '处理'} {filepath}") print(f" 删除 {removed_lines} 行注释 ({original_lines} → {cleaned_lines} 行)") if self.dry_run: print(f" [预览模式] 不会实际修改文件") return True # 备份原文件 if self.backup: backup_path = filepath.with_suffix(filepath.suffix + '.backup') shutil.copy2(filepath, backup_path) print(f" 备份: {backup_path}") # 写入清理后的代码 with open(filepath, 'w', encoding='utf-8') as f: f.write(cleaned_code) print(f"✅ 完成: {filepath}") self.stats['processed'] += 1 return True except Exception as e: print(f"❌ 错误: {filepath}") print(f" {str(e)}") self.stats['errors'] += 1 return False def process_directory(self, directory: Path, recursive: bool = True) -> None: """ 处理目录中的所有文件 Args: directory: 目录路径 recursive: 是否递归处理子目录 """ if recursive: pattern = '**/*' else: pattern = '*' files = [f for f in directory.glob(pattern) if f.is_file()] supported_files = [f for f in files if self.is_supported_file(f)] print(f"\n🔍 找到 {len(supported_files)} 个支持的文件\n") for filepath in supported_files: self.process_file(filepath) self.print_summary() def print_summary(self) -> None: """打印处理摘要""" print("\n" + "=" * 60) print("📊 处理摘要") print("=" * 60) print(f"✅ 已处理: {self.stats['processed']} 个文件") print(f"⚠️ 已跳过: {self.stats['skipped']} 个文件") print(f"❌ 错误: {self.stats['errors']} 个文件") print("=" * 60 + "\n") def main(): """主函数""" parser = argparse.ArgumentParser( description='安全移除代码注释工具', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: # 处理单个文件 python remove_comments.py path/to/file.m # 处理整个目录 python remove_comments.py YuMi/Global/ -r # 预览模式(不实际修改) python remove_comments.py YuMi/Global/ -r --dry-run # 不保留版权信息和重要注释 python remove_comments.py file.m --no-copyright --no-important # 不创建备份文件 python remove_comments.py file.m --no-backup """ ) parser.add_argument('path', type=str, help='文件或目录路径') parser.add_argument('-r', '--recursive', action='store_true', help='递归处理子目录') parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际修改文件)') parser.add_argument('--no-backup', action='store_true', help='不创建备份文件') parser.add_argument('--no-copyright', action='store_true', help='不保留版权声明') parser.add_argument('--no-important', action='store_true', help='不保留重要注释(TODO, FIXME等)') parser.add_argument('--no-pragma', action='store_true', help='不保留编译指令(#pragma mark等)') args = parser.parse_args() # 检查路径 path = Path(args.path) if not path.exists(): print(f"❌ 路径不存在: {path}") sys.exit(1) # 创建注释移除器 remover = CommentRemover( keep_copyright=not args.no_copyright, keep_important=not args.no_important, keep_pragma=not args.no_pragma ) # 创建文件处理器 processor = FileProcessor( remover=remover, backup=not args.no_backup, dry_run=args.dry_run ) # 打印配置 print("\n" + "=" * 60) print("⚙️ 配置") print("=" * 60) print(f"路径: {path}") print(f"模式: {'预览' if args.dry_run else '实际修改'}") print(f"备份: {'是' if not args.no_backup else '否'}") print(f"保留版权: {'是' if not args.no_copyright else '否'}") print(f"保留重要注释: {'是' if not args.no_important else '否'}") print(f"保留编译指令: {'是' if not args.no_pragma else '否'}") print("=" * 60) # 处理文件或目录 if path.is_file(): processor.process_file(path) processor.print_summary() else: processor.process_directory(path, recursive=args.recursive) if __name__ == '__main__': main()