keep edit
This commit is contained in:
451
remove_comments.py
Executable file
451
remove_comments.py
Executable file
@@ -0,0 +1,451 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
安全移除代码注释工具
|
||||
支持 Objective-C, Swift, C/C++ 等语言
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional
|
||||
import shutil
|
||||
|
||||
class CommentRemover:
|
||||
"""代码注释移除器"""
|
||||
|
||||
def __init__(self, keep_copyright=True, keep_important=True, keep_pragma=True):
|
||||
"""
|
||||
初始化注释移除器
|
||||
|
||||
Args:
|
||||
keep_copyright: 保留版权声明
|
||||
keep_important: 保留重要注释(TODO, FIXME, NOTE等)
|
||||
keep_pragma: 保留编译指令(#pragma mark等)
|
||||
"""
|
||||
self.keep_copyright = keep_copyright
|
||||
self.keep_important = keep_important
|
||||
self.keep_pragma = keep_pragma
|
||||
|
||||
# 重要关键词
|
||||
self.important_keywords = [
|
||||
'TODO', 'FIXME', 'HACK', 'NOTE', 'WARNING',
|
||||
'IMPORTANT', 'BUG', 'CRITICAL', 'MARK'
|
||||
]
|
||||
|
||||
# 版权相关关键词
|
||||
self.copyright_keywords = [
|
||||
'Copyright', 'copyright', '©', 'LICENSE',
|
||||
'All rights reserved', 'Created by'
|
||||
]
|
||||
|
||||
def is_copyright_comment(self, comment: str) -> bool:
|
||||
"""判断是否是版权声明注释"""
|
||||
if not self.keep_copyright:
|
||||
return False
|
||||
return any(keyword in comment for keyword in self.copyright_keywords)
|
||||
|
||||
def is_important_comment(self, comment: str) -> bool:
|
||||
"""判断是否是重要注释"""
|
||||
if not self.keep_important:
|
||||
return False
|
||||
return any(keyword in comment for keyword in self.important_keywords)
|
||||
|
||||
def is_pragma_comment(self, comment: str) -> bool:
|
||||
"""判断是否是编译指令注释"""
|
||||
if not self.keep_pragma:
|
||||
return False
|
||||
return '#pragma' in comment or 'MARK:' in comment or 'MARK -' in comment
|
||||
|
||||
def should_keep_comment(self, comment: str) -> bool:
|
||||
"""判断是否应该保留注释"""
|
||||
return (self.is_copyright_comment(comment) or
|
||||
self.is_important_comment(comment) or
|
||||
self.is_pragma_comment(comment))
|
||||
|
||||
def remove_comments(self, code: str, file_extension: str) -> str:
|
||||
"""
|
||||
移除代码中的注释
|
||||
|
||||
Args:
|
||||
code: 源代码字符串
|
||||
file_extension: 文件扩展名
|
||||
|
||||
Returns:
|
||||
移除注释后的代码
|
||||
"""
|
||||
result = []
|
||||
i = 0
|
||||
in_string = False
|
||||
in_char = False
|
||||
string_char = None
|
||||
|
||||
while i < len(code):
|
||||
# 处理字符串
|
||||
if not in_string and not in_char:
|
||||
# 检查字符串开始
|
||||
if code[i] in ('"', "'"):
|
||||
string_char = code[i]
|
||||
in_string = True
|
||||
result.append(code[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Swift 的多行字符串
|
||||
if file_extension == '.swift' and i + 2 < len(code) and code[i:i+3] == '"""':
|
||||
result.append('"""')
|
||||
i += 3
|
||||
# 跳过多行字符串内容
|
||||
while i < len(code):
|
||||
if i + 2 < len(code) and code[i:i+3] == '"""':
|
||||
result.append('"""')
|
||||
i += 3
|
||||
break
|
||||
result.append(code[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 检查单行注释 //
|
||||
if i + 1 < len(code) and code[i:i+2] == '//':
|
||||
# 检查是否是 URL (http://, https://)
|
||||
if i >= 5 and code[i-5:i+2] == 'http://':
|
||||
result.append(code[i])
|
||||
i += 1
|
||||
continue
|
||||
if i >= 6 and code[i-6:i+2] == 'https://':
|
||||
result.append(code[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 检查是否是行尾注释(前面有非空白字符)
|
||||
line_start = len(result)
|
||||
for j in range(len(result) - 1, -1, -1):
|
||||
if result[j] == '\n':
|
||||
line_start = j + 1
|
||||
break
|
||||
|
||||
# 获取当前行已有的内容
|
||||
current_line = ''.join(result[line_start:])
|
||||
has_code_before = current_line.strip() != ''
|
||||
|
||||
# 提取注释内容
|
||||
comment_start = i
|
||||
i += 2
|
||||
while i < len(code) and code[i] != '\n':
|
||||
i += 1
|
||||
|
||||
comment = code[comment_start:i]
|
||||
|
||||
# 判断是否保留注释
|
||||
if self.should_keep_comment(comment):
|
||||
result.append(comment)
|
||||
elif has_code_before:
|
||||
# 行尾注释:移除注释但保留前面的空格
|
||||
# 移除注释前的多余空格,只保留一个空格的位置
|
||||
while len(result) > 0 and result[-1] in (' ', '\t'):
|
||||
result.pop()
|
||||
|
||||
# 保留换行符
|
||||
if i < len(code) and code[i] == '\n':
|
||||
result.append('\n')
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 检查多行注释 /* */ 或文档注释 /** */
|
||||
if i + 1 < len(code) and code[i:i+2] == '/*':
|
||||
comment_start = i
|
||||
i += 2
|
||||
|
||||
# 检查是否是文档注释 /**
|
||||
is_doc_comment = (i < len(code) and code[i] == '*')
|
||||
|
||||
# 查找注释结束
|
||||
while i < len(code):
|
||||
if i + 1 < len(code) and code[i:i+2] == '*/':
|
||||
i += 2
|
||||
break
|
||||
i += 1
|
||||
|
||||
comment = code[comment_start:i]
|
||||
|
||||
# 判断是否保留注释
|
||||
if self.should_keep_comment(comment):
|
||||
result.append(comment)
|
||||
else:
|
||||
# 检查注释前是否有代码(行尾注释)
|
||||
line_start = len(result)
|
||||
for j in range(len(result) - 1, -1, -1):
|
||||
if result[j] == '\n':
|
||||
line_start = j + 1
|
||||
break
|
||||
|
||||
current_line = ''.join(result[line_start:])
|
||||
has_code_before = current_line.strip() != ''
|
||||
|
||||
if has_code_before:
|
||||
# 行尾多行注释:移除注释前的多余空格
|
||||
while len(result) > 0 and result[-1] in (' ', '\t'):
|
||||
result.pop()
|
||||
else:
|
||||
# 独立的多行注释:保留最多1个换行符维持代码分段
|
||||
newline_count = comment.count('\n')
|
||||
if newline_count > 0:
|
||||
result.append('\n')
|
||||
|
||||
continue
|
||||
|
||||
# 在字符串内部
|
||||
elif in_string:
|
||||
# 检查转义字符
|
||||
if code[i] == '\\' and i + 1 < len(code):
|
||||
result.append(code[i])
|
||||
result.append(code[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# 检查字符串结束
|
||||
if code[i] == string_char:
|
||||
in_string = False
|
||||
string_char = None
|
||||
|
||||
result.append(code[i])
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 普通字符
|
||||
result.append(code[i])
|
||||
i += 1
|
||||
|
||||
return ''.join(result)
|
||||
|
||||
def clean_empty_lines(self, code: str) -> str:
|
||||
"""清理多余的空行(超过2行的空行压缩为2行)"""
|
||||
lines = code.split('\n')
|
||||
result = []
|
||||
empty_count = 0
|
||||
|
||||
for line in lines:
|
||||
if line.strip() == '':
|
||||
empty_count += 1
|
||||
if empty_count <= 2:
|
||||
result.append(line)
|
||||
else:
|
||||
empty_count = 0
|
||||
result.append(line)
|
||||
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
class FileProcessor:
|
||||
"""文件处理器"""
|
||||
|
||||
# 支持的文件扩展名
|
||||
SUPPORTED_EXTENSIONS = {'.m', '.h', '.mm', '.swift', '.c', '.cpp', '.cc', '.hpp'}
|
||||
|
||||
def __init__(self, remover: CommentRemover, backup: bool = True, dry_run: bool = False):
|
||||
"""
|
||||
初始化文件处理器
|
||||
|
||||
Args:
|
||||
remover: 注释移除器实例
|
||||
backup: 是否备份原文件
|
||||
dry_run: 是否仅预览(不实际修改文件)
|
||||
"""
|
||||
self.remover = remover
|
||||
self.backup = backup
|
||||
self.dry_run = dry_run
|
||||
self.stats = {
|
||||
'processed': 0,
|
||||
'skipped': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
def is_supported_file(self, filepath: Path) -> bool:
|
||||
"""检查文件是否支持"""
|
||||
return filepath.suffix in self.SUPPORTED_EXTENSIONS
|
||||
|
||||
def process_file(self, filepath: Path) -> bool:
|
||||
"""
|
||||
处理单个文件
|
||||
|
||||
Args:
|
||||
filepath: 文件路径
|
||||
|
||||
Returns:
|
||||
处理是否成功
|
||||
"""
|
||||
try:
|
||||
if not self.is_supported_file(filepath):
|
||||
print(f"⚠️ 跳过不支持的文件: {filepath}")
|
||||
self.stats['skipped'] += 1
|
||||
return False
|
||||
|
||||
# 读取文件
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
original_code = f.read()
|
||||
|
||||
# 移除注释
|
||||
cleaned_code = self.remover.remove_comments(
|
||||
original_code,
|
||||
filepath.suffix
|
||||
)
|
||||
|
||||
# 清理空行
|
||||
cleaned_code = self.remover.clean_empty_lines(cleaned_code)
|
||||
|
||||
# 计算变化
|
||||
original_lines = len(original_code.splitlines())
|
||||
cleaned_lines = len(cleaned_code.splitlines())
|
||||
removed_lines = original_lines - cleaned_lines
|
||||
|
||||
if original_code == cleaned_code:
|
||||
print(f"✓ 无需修改: {filepath}")
|
||||
self.stats['skipped'] += 1
|
||||
return True
|
||||
|
||||
print(f"📝 {'[预览]' if self.dry_run else '处理'} {filepath}")
|
||||
print(f" 删除 {removed_lines} 行注释 ({original_lines} → {cleaned_lines} 行)")
|
||||
|
||||
if self.dry_run:
|
||||
print(f" [预览模式] 不会实际修改文件")
|
||||
return True
|
||||
|
||||
# 备份原文件
|
||||
if self.backup:
|
||||
backup_path = filepath.with_suffix(filepath.suffix + '.backup')
|
||||
shutil.copy2(filepath, backup_path)
|
||||
print(f" 备份: {backup_path}")
|
||||
|
||||
# 写入清理后的代码
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(cleaned_code)
|
||||
|
||||
print(f"✅ 完成: {filepath}")
|
||||
self.stats['processed'] += 1
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 错误: {filepath}")
|
||||
print(f" {str(e)}")
|
||||
self.stats['errors'] += 1
|
||||
return False
|
||||
|
||||
def process_directory(self, directory: Path, recursive: bool = True) -> None:
|
||||
"""
|
||||
处理目录中的所有文件
|
||||
|
||||
Args:
|
||||
directory: 目录路径
|
||||
recursive: 是否递归处理子目录
|
||||
"""
|
||||
if recursive:
|
||||
pattern = '**/*'
|
||||
else:
|
||||
pattern = '*'
|
||||
|
||||
files = [f for f in directory.glob(pattern) if f.is_file()]
|
||||
supported_files = [f for f in files if self.is_supported_file(f)]
|
||||
|
||||
print(f"\n🔍 找到 {len(supported_files)} 个支持的文件\n")
|
||||
|
||||
for filepath in supported_files:
|
||||
self.process_file(filepath)
|
||||
|
||||
self.print_summary()
|
||||
|
||||
def print_summary(self) -> None:
|
||||
"""打印处理摘要"""
|
||||
print("\n" + "=" * 60)
|
||||
print("📊 处理摘要")
|
||||
print("=" * 60)
|
||||
print(f"✅ 已处理: {self.stats['processed']} 个文件")
|
||||
print(f"⚠️ 已跳过: {self.stats['skipped']} 个文件")
|
||||
print(f"❌ 错误: {self.stats['errors']} 个文件")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='安全移除代码注释工具',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
示例:
|
||||
# 处理单个文件
|
||||
python remove_comments.py path/to/file.m
|
||||
|
||||
# 处理整个目录
|
||||
python remove_comments.py YuMi/Global/ -r
|
||||
|
||||
# 预览模式(不实际修改)
|
||||
python remove_comments.py YuMi/Global/ -r --dry-run
|
||||
|
||||
# 不保留版权信息和重要注释
|
||||
python remove_comments.py file.m --no-copyright --no-important
|
||||
|
||||
# 不创建备份文件
|
||||
python remove_comments.py file.m --no-backup
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('path', type=str, help='文件或目录路径')
|
||||
parser.add_argument('-r', '--recursive', action='store_true',
|
||||
help='递归处理子目录')
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='预览模式(不实际修改文件)')
|
||||
parser.add_argument('--no-backup', action='store_true',
|
||||
help='不创建备份文件')
|
||||
parser.add_argument('--no-copyright', action='store_true',
|
||||
help='不保留版权声明')
|
||||
parser.add_argument('--no-important', action='store_true',
|
||||
help='不保留重要注释(TODO, FIXME等)')
|
||||
parser.add_argument('--no-pragma', action='store_true',
|
||||
help='不保留编译指令(#pragma mark等)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 检查路径
|
||||
path = Path(args.path)
|
||||
if not path.exists():
|
||||
print(f"❌ 路径不存在: {path}")
|
||||
sys.exit(1)
|
||||
|
||||
# 创建注释移除器
|
||||
remover = CommentRemover(
|
||||
keep_copyright=not args.no_copyright,
|
||||
keep_important=not args.no_important,
|
||||
keep_pragma=not args.no_pragma
|
||||
)
|
||||
|
||||
# 创建文件处理器
|
||||
processor = FileProcessor(
|
||||
remover=remover,
|
||||
backup=not args.no_backup,
|
||||
dry_run=args.dry_run
|
||||
)
|
||||
|
||||
# 打印配置
|
||||
print("\n" + "=" * 60)
|
||||
print("⚙️ 配置")
|
||||
print("=" * 60)
|
||||
print(f"路径: {path}")
|
||||
print(f"模式: {'预览' if args.dry_run else '实际修改'}")
|
||||
print(f"备份: {'是' if not args.no_backup else '否'}")
|
||||
print(f"保留版权: {'是' if not args.no_copyright else '否'}")
|
||||
print(f"保留重要注释: {'是' if not args.no_important else '否'}")
|
||||
print(f"保留编译指令: {'是' if not args.no_pragma else '否'}")
|
||||
print("=" * 60)
|
||||
|
||||
# 处理文件或目录
|
||||
if path.is_file():
|
||||
processor.process_file(path)
|
||||
processor.print_summary()
|
||||
else:
|
||||
processor.process_directory(path, recursive=args.recursive)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Reference in New Issue
Block a user