452 lines
16 KiB
Python
Executable File
452 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
安全移除代码注释工具
|
||
支持 Objective-C, Swift, C/C++ 等语言
|
||
"""
|
||
|
||
import re
|
||
import os
|
||
import sys
|
||
import argparse
|
||
from pathlib import Path
|
||
from typing import List, Tuple, Optional
|
||
import shutil
|
||
|
||
class CommentRemover:
|
||
"""代码注释移除器"""
|
||
|
||
def __init__(self, keep_copyright=True, keep_important=True, keep_pragma=True):
|
||
"""
|
||
初始化注释移除器
|
||
|
||
Args:
|
||
keep_copyright: 保留版权声明
|
||
keep_important: 保留重要注释(TODO, FIXME, NOTE等)
|
||
keep_pragma: 保留编译指令(#pragma mark等)
|
||
"""
|
||
self.keep_copyright = keep_copyright
|
||
self.keep_important = keep_important
|
||
self.keep_pragma = keep_pragma
|
||
|
||
# 重要关键词
|
||
self.important_keywords = [
|
||
'TODO', 'FIXME', 'HACK', 'NOTE', 'WARNING',
|
||
'IMPORTANT', 'BUG', 'CRITICAL', 'MARK'
|
||
]
|
||
|
||
# 版权相关关键词
|
||
self.copyright_keywords = [
|
||
'Copyright', 'copyright', '©', 'LICENSE',
|
||
'All rights reserved', 'Created by'
|
||
]
|
||
|
||
def is_copyright_comment(self, comment: str) -> bool:
|
||
"""判断是否是版权声明注释"""
|
||
if not self.keep_copyright:
|
||
return False
|
||
return any(keyword in comment for keyword in self.copyright_keywords)
|
||
|
||
def is_important_comment(self, comment: str) -> bool:
|
||
"""判断是否是重要注释"""
|
||
if not self.keep_important:
|
||
return False
|
||
return any(keyword in comment for keyword in self.important_keywords)
|
||
|
||
def is_pragma_comment(self, comment: str) -> bool:
|
||
"""判断是否是编译指令注释"""
|
||
if not self.keep_pragma:
|
||
return False
|
||
return '#pragma' in comment or 'MARK:' in comment or 'MARK -' in comment
|
||
|
||
def should_keep_comment(self, comment: str) -> bool:
|
||
"""判断是否应该保留注释"""
|
||
return (self.is_copyright_comment(comment) or
|
||
self.is_important_comment(comment) or
|
||
self.is_pragma_comment(comment))
|
||
|
||
def remove_comments(self, code: str, file_extension: str) -> str:
|
||
"""
|
||
移除代码中的注释
|
||
|
||
Args:
|
||
code: 源代码字符串
|
||
file_extension: 文件扩展名
|
||
|
||
Returns:
|
||
移除注释后的代码
|
||
"""
|
||
result = []
|
||
i = 0
|
||
in_string = False
|
||
in_char = False
|
||
string_char = None
|
||
|
||
while i < len(code):
|
||
# 处理字符串
|
||
if not in_string and not in_char:
|
||
# 检查字符串开始
|
||
if code[i] in ('"', "'"):
|
||
string_char = code[i]
|
||
in_string = True
|
||
result.append(code[i])
|
||
i += 1
|
||
continue
|
||
|
||
# Swift 的多行字符串
|
||
if file_extension == '.swift' and i + 2 < len(code) and code[i:i+3] == '"""':
|
||
result.append('"""')
|
||
i += 3
|
||
# 跳过多行字符串内容
|
||
while i < len(code):
|
||
if i + 2 < len(code) and code[i:i+3] == '"""':
|
||
result.append('"""')
|
||
i += 3
|
||
break
|
||
result.append(code[i])
|
||
i += 1
|
||
continue
|
||
|
||
# 检查单行注释 //
|
||
if i + 1 < len(code) and code[i:i+2] == '//':
|
||
# 检查是否是 URL (http://, https://)
|
||
if i >= 5 and code[i-5:i+2] == 'http://':
|
||
result.append(code[i])
|
||
i += 1
|
||
continue
|
||
if i >= 6 and code[i-6:i+2] == 'https://':
|
||
result.append(code[i])
|
||
i += 1
|
||
continue
|
||
|
||
# 检查是否是行尾注释(前面有非空白字符)
|
||
line_start = len(result)
|
||
for j in range(len(result) - 1, -1, -1):
|
||
if result[j] == '\n':
|
||
line_start = j + 1
|
||
break
|
||
|
||
# 获取当前行已有的内容
|
||
current_line = ''.join(result[line_start:])
|
||
has_code_before = current_line.strip() != ''
|
||
|
||
# 提取注释内容
|
||
comment_start = i
|
||
i += 2
|
||
while i < len(code) and code[i] != '\n':
|
||
i += 1
|
||
|
||
comment = code[comment_start:i]
|
||
|
||
# 判断是否保留注释
|
||
if self.should_keep_comment(comment):
|
||
result.append(comment)
|
||
elif has_code_before:
|
||
# 行尾注释:移除注释但保留前面的空格
|
||
# 移除注释前的多余空格,只保留一个空格的位置
|
||
while len(result) > 0 and result[-1] in (' ', '\t'):
|
||
result.pop()
|
||
|
||
# 保留换行符
|
||
if i < len(code) and code[i] == '\n':
|
||
result.append('\n')
|
||
i += 1
|
||
continue
|
||
|
||
# 检查多行注释 /* */ 或文档注释 /** */
|
||
if i + 1 < len(code) and code[i:i+2] == '/*':
|
||
comment_start = i
|
||
i += 2
|
||
|
||
# 检查是否是文档注释 /**
|
||
is_doc_comment = (i < len(code) and code[i] == '*')
|
||
|
||
# 查找注释结束
|
||
while i < len(code):
|
||
if i + 1 < len(code) and code[i:i+2] == '*/':
|
||
i += 2
|
||
break
|
||
i += 1
|
||
|
||
comment = code[comment_start:i]
|
||
|
||
# 判断是否保留注释
|
||
if self.should_keep_comment(comment):
|
||
result.append(comment)
|
||
else:
|
||
# 检查注释前是否有代码(行尾注释)
|
||
line_start = len(result)
|
||
for j in range(len(result) - 1, -1, -1):
|
||
if result[j] == '\n':
|
||
line_start = j + 1
|
||
break
|
||
|
||
current_line = ''.join(result[line_start:])
|
||
has_code_before = current_line.strip() != ''
|
||
|
||
if has_code_before:
|
||
# 行尾多行注释:移除注释前的多余空格
|
||
while len(result) > 0 and result[-1] in (' ', '\t'):
|
||
result.pop()
|
||
else:
|
||
# 独立的多行注释:保留最多1个换行符维持代码分段
|
||
newline_count = comment.count('\n')
|
||
if newline_count > 0:
|
||
result.append('\n')
|
||
|
||
continue
|
||
|
||
# 在字符串内部
|
||
elif in_string:
|
||
# 检查转义字符
|
||
if code[i] == '\\' and i + 1 < len(code):
|
||
result.append(code[i])
|
||
result.append(code[i + 1])
|
||
i += 2
|
||
continue
|
||
|
||
# 检查字符串结束
|
||
if code[i] == string_char:
|
||
in_string = False
|
||
string_char = None
|
||
|
||
result.append(code[i])
|
||
i += 1
|
||
continue
|
||
|
||
# 普通字符
|
||
result.append(code[i])
|
||
i += 1
|
||
|
||
return ''.join(result)
|
||
|
||
def clean_empty_lines(self, code: str) -> str:
|
||
"""清理多余的空行(超过2行的空行压缩为2行)"""
|
||
lines = code.split('\n')
|
||
result = []
|
||
empty_count = 0
|
||
|
||
for line in lines:
|
||
if line.strip() == '':
|
||
empty_count += 1
|
||
if empty_count <= 2:
|
||
result.append(line)
|
||
else:
|
||
empty_count = 0
|
||
result.append(line)
|
||
|
||
return '\n'.join(result)
|
||
|
||
|
||
class FileProcessor:
|
||
"""文件处理器"""
|
||
|
||
# 支持的文件扩展名
|
||
SUPPORTED_EXTENSIONS = {'.m', '.h', '.mm', '.swift', '.c', '.cpp', '.cc', '.hpp'}
|
||
|
||
def __init__(self, remover: CommentRemover, backup: bool = True, dry_run: bool = False):
|
||
"""
|
||
初始化文件处理器
|
||
|
||
Args:
|
||
remover: 注释移除器实例
|
||
backup: 是否备份原文件
|
||
dry_run: 是否仅预览(不实际修改文件)
|
||
"""
|
||
self.remover = remover
|
||
self.backup = backup
|
||
self.dry_run = dry_run
|
||
self.stats = {
|
||
'processed': 0,
|
||
'skipped': 0,
|
||
'errors': 0
|
||
}
|
||
|
||
def is_supported_file(self, filepath: Path) -> bool:
|
||
"""检查文件是否支持"""
|
||
return filepath.suffix in self.SUPPORTED_EXTENSIONS
|
||
|
||
def process_file(self, filepath: Path) -> bool:
|
||
"""
|
||
处理单个文件
|
||
|
||
Args:
|
||
filepath: 文件路径
|
||
|
||
Returns:
|
||
处理是否成功
|
||
"""
|
||
try:
|
||
if not self.is_supported_file(filepath):
|
||
print(f"⚠️ 跳过不支持的文件: {filepath}")
|
||
self.stats['skipped'] += 1
|
||
return False
|
||
|
||
# 读取文件
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
original_code = f.read()
|
||
|
||
# 移除注释
|
||
cleaned_code = self.remover.remove_comments(
|
||
original_code,
|
||
filepath.suffix
|
||
)
|
||
|
||
# 清理空行
|
||
cleaned_code = self.remover.clean_empty_lines(cleaned_code)
|
||
|
||
# 计算变化
|
||
original_lines = len(original_code.splitlines())
|
||
cleaned_lines = len(cleaned_code.splitlines())
|
||
removed_lines = original_lines - cleaned_lines
|
||
|
||
if original_code == cleaned_code:
|
||
print(f"✓ 无需修改: {filepath}")
|
||
self.stats['skipped'] += 1
|
||
return True
|
||
|
||
print(f"📝 {'[预览]' if self.dry_run else '处理'} {filepath}")
|
||
print(f" 删除 {removed_lines} 行注释 ({original_lines} → {cleaned_lines} 行)")
|
||
|
||
if self.dry_run:
|
||
print(f" [预览模式] 不会实际修改文件")
|
||
return True
|
||
|
||
# 备份原文件
|
||
if self.backup:
|
||
backup_path = filepath.with_suffix(filepath.suffix + '.backup')
|
||
shutil.copy2(filepath, backup_path)
|
||
print(f" 备份: {backup_path}")
|
||
|
||
# 写入清理后的代码
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
f.write(cleaned_code)
|
||
|
||
print(f"✅ 完成: {filepath}")
|
||
self.stats['processed'] += 1
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"❌ 错误: {filepath}")
|
||
print(f" {str(e)}")
|
||
self.stats['errors'] += 1
|
||
return False
|
||
|
||
def process_directory(self, directory: Path, recursive: bool = True) -> None:
|
||
"""
|
||
处理目录中的所有文件
|
||
|
||
Args:
|
||
directory: 目录路径
|
||
recursive: 是否递归处理子目录
|
||
"""
|
||
if recursive:
|
||
pattern = '**/*'
|
||
else:
|
||
pattern = '*'
|
||
|
||
files = [f for f in directory.glob(pattern) if f.is_file()]
|
||
supported_files = [f for f in files if self.is_supported_file(f)]
|
||
|
||
print(f"\n🔍 找到 {len(supported_files)} 个支持的文件\n")
|
||
|
||
for filepath in supported_files:
|
||
self.process_file(filepath)
|
||
|
||
self.print_summary()
|
||
|
||
def print_summary(self) -> None:
|
||
"""打印处理摘要"""
|
||
print("\n" + "=" * 60)
|
||
print("📊 处理摘要")
|
||
print("=" * 60)
|
||
print(f"✅ 已处理: {self.stats['processed']} 个文件")
|
||
print(f"⚠️ 已跳过: {self.stats['skipped']} 个文件")
|
||
print(f"❌ 错误: {self.stats['errors']} 个文件")
|
||
print("=" * 60 + "\n")
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
parser = argparse.ArgumentParser(
|
||
description='安全移除代码注释工具',
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog="""
|
||
示例:
|
||
# 处理单个文件
|
||
python remove_comments.py path/to/file.m
|
||
|
||
# 处理整个目录
|
||
python remove_comments.py YuMi/Global/ -r
|
||
|
||
# 预览模式(不实际修改)
|
||
python remove_comments.py YuMi/Global/ -r --dry-run
|
||
|
||
# 不保留版权信息和重要注释
|
||
python remove_comments.py file.m --no-copyright --no-important
|
||
|
||
# 不创建备份文件
|
||
python remove_comments.py file.m --no-backup
|
||
"""
|
||
)
|
||
|
||
parser.add_argument('path', type=str, help='文件或目录路径')
|
||
parser.add_argument('-r', '--recursive', action='store_true',
|
||
help='递归处理子目录')
|
||
parser.add_argument('--dry-run', action='store_true',
|
||
help='预览模式(不实际修改文件)')
|
||
parser.add_argument('--no-backup', action='store_true',
|
||
help='不创建备份文件')
|
||
parser.add_argument('--no-copyright', action='store_true',
|
||
help='不保留版权声明')
|
||
parser.add_argument('--no-important', action='store_true',
|
||
help='不保留重要注释(TODO, FIXME等)')
|
||
parser.add_argument('--no-pragma', action='store_true',
|
||
help='不保留编译指令(#pragma mark等)')
|
||
|
||
args = parser.parse_args()
|
||
|
||
# 检查路径
|
||
path = Path(args.path)
|
||
if not path.exists():
|
||
print(f"❌ 路径不存在: {path}")
|
||
sys.exit(1)
|
||
|
||
# 创建注释移除器
|
||
remover = CommentRemover(
|
||
keep_copyright=not args.no_copyright,
|
||
keep_important=not args.no_important,
|
||
keep_pragma=not args.no_pragma
|
||
)
|
||
|
||
# 创建文件处理器
|
||
processor = FileProcessor(
|
||
remover=remover,
|
||
backup=not args.no_backup,
|
||
dry_run=args.dry_run
|
||
)
|
||
|
||
# 打印配置
|
||
print("\n" + "=" * 60)
|
||
print("⚙️ 配置")
|
||
print("=" * 60)
|
||
print(f"路径: {path}")
|
||
print(f"模式: {'预览' if args.dry_run else '实际修改'}")
|
||
print(f"备份: {'是' if not args.no_backup else '否'}")
|
||
print(f"保留版权: {'是' if not args.no_copyright else '否'}")
|
||
print(f"保留重要注释: {'是' if not args.no_important else '否'}")
|
||
print(f"保留编译指令: {'是' if not args.no_pragma else '否'}")
|
||
print("=" * 60)
|
||
|
||
# 处理文件或目录
|
||
if path.is_file():
|
||
processor.process_file(path)
|
||
processor.print_summary()
|
||
else:
|
||
processor.process_directory(path, recursive=args.recursive)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|
||
|