Files
real-e-party-iOS/remove_comments.py
2025-10-17 14:52:29 +08:00

452 lines
16 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
安全移除代码注释工具
支持 Objective-C, Swift, C/C++ 等语言
"""
import re
import os
import sys
import argparse
from pathlib import Path
from typing import List, Tuple, Optional
import shutil
class CommentRemover:
"""代码注释移除器"""
def __init__(self, keep_copyright=True, keep_important=True, keep_pragma=True):
"""
初始化注释移除器
Args:
keep_copyright: 保留版权声明
keep_important: 保留重要注释TODO, FIXME, NOTE等
keep_pragma: 保留编译指令(#pragma mark等
"""
self.keep_copyright = keep_copyright
self.keep_important = keep_important
self.keep_pragma = keep_pragma
# 重要关键词
self.important_keywords = [
'TODO', 'FIXME', 'HACK', 'NOTE', 'WARNING',
'IMPORTANT', 'BUG', 'CRITICAL', 'MARK'
]
# 版权相关关键词
self.copyright_keywords = [
'Copyright', 'copyright', '©', 'LICENSE',
'All rights reserved', 'Created by'
]
def is_copyright_comment(self, comment: str) -> bool:
"""判断是否是版权声明注释"""
if not self.keep_copyright:
return False
return any(keyword in comment for keyword in self.copyright_keywords)
def is_important_comment(self, comment: str) -> bool:
"""判断是否是重要注释"""
if not self.keep_important:
return False
return any(keyword in comment for keyword in self.important_keywords)
def is_pragma_comment(self, comment: str) -> bool:
"""判断是否是编译指令注释"""
if not self.keep_pragma:
return False
return '#pragma' in comment or 'MARK:' in comment or 'MARK -' in comment
def should_keep_comment(self, comment: str) -> bool:
"""判断是否应该保留注释"""
return (self.is_copyright_comment(comment) or
self.is_important_comment(comment) or
self.is_pragma_comment(comment))
def remove_comments(self, code: str, file_extension: str) -> str:
"""
移除代码中的注释
Args:
code: 源代码字符串
file_extension: 文件扩展名
Returns:
移除注释后的代码
"""
result = []
i = 0
in_string = False
in_char = False
string_char = None
while i < len(code):
# 处理字符串
if not in_string and not in_char:
# 检查字符串开始
if code[i] in ('"', "'"):
string_char = code[i]
in_string = True
result.append(code[i])
i += 1
continue
# Swift 的多行字符串
if file_extension == '.swift' and i + 2 < len(code) and code[i:i+3] == '"""':
result.append('"""')
i += 3
# 跳过多行字符串内容
while i < len(code):
if i + 2 < len(code) and code[i:i+3] == '"""':
result.append('"""')
i += 3
break
result.append(code[i])
i += 1
continue
# 检查单行注释 //
if i + 1 < len(code) and code[i:i+2] == '//':
# 检查是否是 URL (http://, https://)
if i >= 5 and code[i-5:i+2] == 'http://':
result.append(code[i])
i += 1
continue
if i >= 6 and code[i-6:i+2] == 'https://':
result.append(code[i])
i += 1
continue
# 检查是否是行尾注释(前面有非空白字符)
line_start = len(result)
for j in range(len(result) - 1, -1, -1):
if result[j] == '\n':
line_start = j + 1
break
# 获取当前行已有的内容
current_line = ''.join(result[line_start:])
has_code_before = current_line.strip() != ''
# 提取注释内容
comment_start = i
i += 2
while i < len(code) and code[i] != '\n':
i += 1
comment = code[comment_start:i]
# 判断是否保留注释
if self.should_keep_comment(comment):
result.append(comment)
elif has_code_before:
# 行尾注释:移除注释但保留前面的空格
# 移除注释前的多余空格,只保留一个空格的位置
while len(result) > 0 and result[-1] in (' ', '\t'):
result.pop()
# 保留换行符
if i < len(code) and code[i] == '\n':
result.append('\n')
i += 1
continue
# 检查多行注释 /* */ 或文档注释 /** */
if i + 1 < len(code) and code[i:i+2] == '/*':
comment_start = i
i += 2
# 检查是否是文档注释 /**
is_doc_comment = (i < len(code) and code[i] == '*')
# 查找注释结束
while i < len(code):
if i + 1 < len(code) and code[i:i+2] == '*/':
i += 2
break
i += 1
comment = code[comment_start:i]
# 判断是否保留注释
if self.should_keep_comment(comment):
result.append(comment)
else:
# 检查注释前是否有代码(行尾注释)
line_start = len(result)
for j in range(len(result) - 1, -1, -1):
if result[j] == '\n':
line_start = j + 1
break
current_line = ''.join(result[line_start:])
has_code_before = current_line.strip() != ''
if has_code_before:
# 行尾多行注释:移除注释前的多余空格
while len(result) > 0 and result[-1] in (' ', '\t'):
result.pop()
else:
# 独立的多行注释保留最多1个换行符维持代码分段
newline_count = comment.count('\n')
if newline_count > 0:
result.append('\n')
continue
# 在字符串内部
elif in_string:
# 检查转义字符
if code[i] == '\\' and i + 1 < len(code):
result.append(code[i])
result.append(code[i + 1])
i += 2
continue
# 检查字符串结束
if code[i] == string_char:
in_string = False
string_char = None
result.append(code[i])
i += 1
continue
# 普通字符
result.append(code[i])
i += 1
return ''.join(result)
def clean_empty_lines(self, code: str) -> str:
"""清理多余的空行超过2行的空行压缩为2行"""
lines = code.split('\n')
result = []
empty_count = 0
for line in lines:
if line.strip() == '':
empty_count += 1
if empty_count <= 2:
result.append(line)
else:
empty_count = 0
result.append(line)
return '\n'.join(result)
class FileProcessor:
"""文件处理器"""
# 支持的文件扩展名
SUPPORTED_EXTENSIONS = {'.m', '.h', '.mm', '.swift', '.c', '.cpp', '.cc', '.hpp'}
def __init__(self, remover: CommentRemover, backup: bool = True, dry_run: bool = False):
"""
初始化文件处理器
Args:
remover: 注释移除器实例
backup: 是否备份原文件
dry_run: 是否仅预览(不实际修改文件)
"""
self.remover = remover
self.backup = backup
self.dry_run = dry_run
self.stats = {
'processed': 0,
'skipped': 0,
'errors': 0
}
def is_supported_file(self, filepath: Path) -> bool:
"""检查文件是否支持"""
return filepath.suffix in self.SUPPORTED_EXTENSIONS
def process_file(self, filepath: Path) -> bool:
"""
处理单个文件
Args:
filepath: 文件路径
Returns:
处理是否成功
"""
try:
if not self.is_supported_file(filepath):
print(f"⚠️ 跳过不支持的文件: {filepath}")
self.stats['skipped'] += 1
return False
# 读取文件
with open(filepath, 'r', encoding='utf-8') as f:
original_code = f.read()
# 移除注释
cleaned_code = self.remover.remove_comments(
original_code,
filepath.suffix
)
# 清理空行
cleaned_code = self.remover.clean_empty_lines(cleaned_code)
# 计算变化
original_lines = len(original_code.splitlines())
cleaned_lines = len(cleaned_code.splitlines())
removed_lines = original_lines - cleaned_lines
if original_code == cleaned_code:
print(f"✓ 无需修改: {filepath}")
self.stats['skipped'] += 1
return True
print(f"📝 {'[预览]' if self.dry_run else '处理'} {filepath}")
print(f" 删除 {removed_lines} 行注释 ({original_lines}{cleaned_lines} 行)")
if self.dry_run:
print(f" [预览模式] 不会实际修改文件")
return True
# 备份原文件
if self.backup:
backup_path = filepath.with_suffix(filepath.suffix + '.backup')
shutil.copy2(filepath, backup_path)
print(f" 备份: {backup_path}")
# 写入清理后的代码
with open(filepath, 'w', encoding='utf-8') as f:
f.write(cleaned_code)
print(f"✅ 完成: {filepath}")
self.stats['processed'] += 1
return True
except Exception as e:
print(f"❌ 错误: {filepath}")
print(f" {str(e)}")
self.stats['errors'] += 1
return False
def process_directory(self, directory: Path, recursive: bool = True) -> None:
"""
处理目录中的所有文件
Args:
directory: 目录路径
recursive: 是否递归处理子目录
"""
if recursive:
pattern = '**/*'
else:
pattern = '*'
files = [f for f in directory.glob(pattern) if f.is_file()]
supported_files = [f for f in files if self.is_supported_file(f)]
print(f"\n🔍 找到 {len(supported_files)} 个支持的文件\n")
for filepath in supported_files:
self.process_file(filepath)
self.print_summary()
def print_summary(self) -> None:
"""打印处理摘要"""
print("\n" + "=" * 60)
print("📊 处理摘要")
print("=" * 60)
print(f"✅ 已处理: {self.stats['processed']} 个文件")
print(f"⚠️ 已跳过: {self.stats['skipped']} 个文件")
print(f"❌ 错误: {self.stats['errors']} 个文件")
print("=" * 60 + "\n")
def main():
"""主函数"""
parser = argparse.ArgumentParser(
description='安全移除代码注释工具',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
# 处理单个文件
python remove_comments.py path/to/file.m
# 处理整个目录
python remove_comments.py YuMi/Global/ -r
# 预览模式(不实际修改)
python remove_comments.py YuMi/Global/ -r --dry-run
# 不保留版权信息和重要注释
python remove_comments.py file.m --no-copyright --no-important
# 不创建备份文件
python remove_comments.py file.m --no-backup
"""
)
parser.add_argument('path', type=str, help='文件或目录路径')
parser.add_argument('-r', '--recursive', action='store_true',
help='递归处理子目录')
parser.add_argument('--dry-run', action='store_true',
help='预览模式(不实际修改文件)')
parser.add_argument('--no-backup', action='store_true',
help='不创建备份文件')
parser.add_argument('--no-copyright', action='store_true',
help='不保留版权声明')
parser.add_argument('--no-important', action='store_true',
help='不保留重要注释TODO, FIXME等')
parser.add_argument('--no-pragma', action='store_true',
help='不保留编译指令(#pragma mark等')
args = parser.parse_args()
# 检查路径
path = Path(args.path)
if not path.exists():
print(f"❌ 路径不存在: {path}")
sys.exit(1)
# 创建注释移除器
remover = CommentRemover(
keep_copyright=not args.no_copyright,
keep_important=not args.no_important,
keep_pragma=not args.no_pragma
)
# 创建文件处理器
processor = FileProcessor(
remover=remover,
backup=not args.no_backup,
dry_run=args.dry_run
)
# 打印配置
print("\n" + "=" * 60)
print("⚙️ 配置")
print("=" * 60)
print(f"路径: {path}")
print(f"模式: {'预览' if args.dry_run else '实际修改'}")
print(f"备份: {'' if not args.no_backup else ''}")
print(f"保留版权: {'' if not args.no_copyright else ''}")
print(f"保留重要注释: {'' if not args.no_important else ''}")
print(f"保留编译指令: {'' if not args.no_pragma else ''}")
print("=" * 60)
# 处理文件或目录
if path.is_file():
processor.process_file(path)
processor.print_summary()
else:
processor.process_directory(path, recursive=args.recursive)
if __name__ == '__main__':
main()