real-e-party-iOS/remove_comments.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
安全移除代码注释工具
支持 Objective-C, Swift, C/C++ 等语言
"""

import re
import os
import sys
import argparse
from pathlib import Path
from typing import List, Tuple, Optional
import shutil

class CommentRemover:
    """代码注释移除器"""

    def __init__(self, keep_copyright=True, keep_important=True, keep_pragma=True):
        """
        初始化注释移除器

        Args:
            keep_copyright: 保留版权声明
            keep_important: 保留重要注释（TODO, FIXME, NOTE等）
            keep_pragma: 保留编译指令（#pragma mark等）
        """
        self.keep_copyright = keep_copyright
        self.keep_important = keep_important
        self.keep_pragma = keep_pragma

        # 重要关键词
        self.important_keywords = [
            'TODO', 'FIXME', 'HACK', 'NOTE', 'WARNING',
            'IMPORTANT', 'BUG', 'CRITICAL', 'MARK'
        ]

        # 版权相关关键词
        self.copyright_keywords = [
            'Copyright', 'copyright', '©', 'LICENSE',
            'All rights reserved', 'Created by'
        ]

    def is_copyright_comment(self, comment: str) -> bool:
        """判断是否是版权声明注释"""
        if not self.keep_copyright:
            return False
        return any(keyword in comment for keyword in self.copyright_keywords)

    def is_important_comment(self, comment: str) -> bool:
        """判断是否是重要注释"""
        if not self.keep_important:
            return False
        return any(keyword in comment for keyword in self.important_keywords)

    def is_pragma_comment(self, comment: str) -> bool:
        """判断是否是编译指令注释"""
        if not self.keep_pragma:
            return False
        return '#pragma' in comment or 'MARK:' in comment or 'MARK -' in comment

    def should_keep_comment(self, comment: str) -> bool:
        """判断是否应该保留注释"""
        return (self.is_copyright_comment(comment) or
                self.is_important_comment(comment) or
                self.is_pragma_comment(comment))

    def remove_comments(self, code: str, file_extension: str) -> str:
        """
        移除代码中的注释

        Args:
            code: 源代码字符串
            file_extension: 文件扩展名

        Returns:
            移除注释后的代码
        """
        result = []
        i = 0
        in_string = False
        in_char = False
        string_char = None

        while i < len(code):
            # 处理字符串
            if not in_string and not in_char:
                # 检查字符串开始
                if code[i] in ('"', "'"):
                    string_char = code[i]
                    in_string = True
                    result.append(code[i])
                    i += 1
                    continue

                # Swift 的多行字符串
                if file_extension == '.swift' and i + 2 < len(code) and code[i:i+3] == '"""':
                    result.append('"""')
                    i += 3
                    # 跳过多行字符串内容
                    while i < len(code):
                        if i + 2 < len(code) and code[i:i+3] == '"""':
                            result.append('"""')
                            i += 3
                            break
                        result.append(code[i])
                        i += 1
                    continue

                # 检查单行注释 //
                if i + 1 < len(code) and code[i:i+2] == '//':
                    # 检查是否是 URL (http://, https://)
                    if i >= 5 and code[i-5:i+2] == 'http://':
                        result.append(code[i])
                        i += 1
                        continue
                    if i >= 6 and code[i-6:i+2] == 'https://':
                        result.append(code[i])
                        i += 1
                        continue

                    # 检查是否是行尾注释（前面有非空白字符）
                    line_start = len(result)
                    for j in range(len(result) - 1, -1, -1):
                        if result[j] == '\n':
                            line_start = j + 1
                            break

                    # 获取当前行已有的内容
                    current_line = ''.join(result[line_start:])
                    has_code_before = current_line.strip() != ''

                    # 提取注释内容
                    comment_start = i
                    i += 2
                    while i < len(code) and code[i] != '\n':
                        i += 1

                    comment = code[comment_start:i]

                    # 判断是否保留注释
                    if self.should_keep_comment(comment):
                        result.append(comment)
                    elif has_code_before:
                        # 行尾注释：移除注释但保留前面的空格
                        # 移除注释前的多余空格，只保留一个空格的位置
                        while len(result) > 0 and result[-1] in (' ', '\t'):
                            result.pop()

                    # 保留换行符
                    if i < len(code) and code[i] == '\n':
                        result.append('\n')
                        i += 1
                    continue

                # 检查多行注释 /* */ 或文档注释 /** */
                if i + 1 < len(code) and code[i:i+2] == '/*':
                    comment_start = i
                    i += 2

                    # 检查是否是文档注释 /**
                    is_doc_comment = (i < len(code) and code[i] == '*')

                    # 查找注释结束
                    while i < len(code):
                        if i + 1 < len(code) and code[i:i+2] == '*/':
                            i += 2
                            break
                        i += 1

                    comment = code[comment_start:i]

                    # 判断是否保留注释
                    if self.should_keep_comment(comment):
                        result.append(comment)
                    else:
                        # 检查注释前是否有代码（行尾注释）
                        line_start = len(result)
                        for j in range(len(result) - 1, -1, -1):
                            if result[j] == '\n':
                                line_start = j + 1
                                break

                        current_line = ''.join(result[line_start:])
                        has_code_before = current_line.strip() != ''

                        if has_code_before:
                            # 行尾多行注释：移除注释前的多余空格
                            while len(result) > 0 and result[-1] in (' ', '\t'):
                                result.pop()
                        else:
                            # 独立的多行注释：保留最多1个换行符维持代码分段
                            newline_count = comment.count('\n')
                            if newline_count > 0:
                                result.append('\n')

                    continue

            # 在字符串内部
            elif in_string:
                # 检查转义字符
                if code[i] == '\\' and i + 1 < len(code):
                    result.append(code[i])
                    result.append(code[i + 1])
                    i += 2
                    continue

                # 检查字符串结束
                if code[i] == string_char:
                    in_string = False
                    string_char = None

                result.append(code[i])
                i += 1
                continue

            # 普通字符
            result.append(code[i])
            i += 1

        return ''.join(result)

    def clean_empty_lines(self, code: str) -> str:
        """清理多余的空行（超过2行的空行压缩为2行）"""
        lines = code.split('\n')
        result = []
        empty_count = 0

        for line in lines:
            if line.strip() == '':
                empty_count += 1
                if empty_count <= 2:
                    result.append(line)
            else:
                empty_count = 0
                result.append(line)

        return '\n'.join(result)


class FileProcessor:
    """文件处理器"""

    # 支持的文件扩展名
    SUPPORTED_EXTENSIONS = {'.m', '.h', '.mm', '.swift', '.c', '.cpp', '.cc', '.hpp'}

    def __init__(self, remover: CommentRemover, backup: bool = True, dry_run: bool = False):
        """
        初始化文件处理器

        Args:
            remover: 注释移除器实例
            backup: 是否备份原文件
            dry_run: 是否仅预览（不实际修改文件）
        """
        self.remover = remover
        self.backup = backup
        self.dry_run = dry_run
        self.stats = {
            'processed': 0,
            'skipped': 0,
            'errors': 0
        }

    def is_supported_file(self, filepath: Path) -> bool:
        """检查文件是否支持"""
        return filepath.suffix in self.SUPPORTED_EXTENSIONS

    def process_file(self, filepath: Path) -> bool:
        """
        处理单个文件

        Args:
            filepath: 文件路径

        Returns:
            处理是否成功
        """
        try:
            if not self.is_supported_file(filepath):
                print(f"⚠️  跳过不支持的文件: {filepath}")
                self.stats['skipped'] += 1
                return False

            # 读取文件
            with open(filepath, 'r', encoding='utf-8') as f:
                original_code = f.read()

            # 移除注释
            cleaned_code = self.remover.remove_comments(
                original_code,
                filepath.suffix
            )

            # 清理空行
            cleaned_code = self.remover.clean_empty_lines(cleaned_code)

            # 计算变化
            original_lines = len(original_code.splitlines())
            cleaned_lines = len(cleaned_code.splitlines())
            removed_lines = original_lines - cleaned_lines

            if original_code == cleaned_code:
                print(f"✓ 无需修改: {filepath}")
                self.stats['skipped'] += 1
                return True

            print(f"📝 {'[预览]' if self.dry_run else '处理'} {filepath}")
            print(f"   删除 {removed_lines} 行注释 ({original_lines} → {cleaned_lines} 行)")

            if self.dry_run:
                print(f"   [预览模式] 不会实际修改文件")
                return True

            # 备份原文件
            if self.backup:
                backup_path = filepath.with_suffix(filepath.suffix + '.backup')
                shutil.copy2(filepath, backup_path)
                print(f"   备份: {backup_path}")

            # 写入清理后的代码
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(cleaned_code)

            print(f"✅ 完成: {filepath}")
            self.stats['processed'] += 1
            return True

        except Exception as e:
            print(f"❌ 错误: {filepath}")
            print(f"   {str(e)}")
            self.stats['errors'] += 1
            return False

    def process_directory(self, directory: Path, recursive: bool = True) -> None:
        """
        处理目录中的所有文件

        Args:
            directory: 目录路径
            recursive: 是否递归处理子目录
        """
        if recursive:
            pattern = '**/*'
        else:
            pattern = '*'

        files = [f for f in directory.glob(pattern) if f.is_file()]
        supported_files = [f for f in files if self.is_supported_file(f)]

        print(f"\n🔍 找到 {len(supported_files)} 个支持的文件\n")

        for filepath in supported_files:
            self.process_file(filepath)

        self.print_summary()

    def print_summary(self) -> None:
        """打印处理摘要"""
        print("\n" + "=" * 60)
        print("📊 处理摘要")
        print("=" * 60)
        print(f"✅ 已处理: {self.stats['processed']} 个文件")
        print(f"⚠️  已跳过: {self.stats['skipped']} 个文件")
        print(f"❌ 错误: {self.stats['errors']} 个文件")
        print("=" * 60 + "\n")


def main():
    """主函数"""
    parser = argparse.ArgumentParser(
        description='安全移除代码注释工具',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例:
  # 处理单个文件
  python remove_comments.py path/to/file.m

  # 处理整个目录
  python remove_comments.py YuMi/Global/ -r

  # 预览模式（不实际修改）
  python remove_comments.py YuMi/Global/ -r --dry-run

  # 不保留版权信息和重要注释
  python remove_comments.py file.m --no-copyright --no-important

  # 不创建备份文件
  python remove_comments.py file.m --no-backup
        """
    )

    parser.add_argument('path', type=str, help='文件或目录路径')
    parser.add_argument('-r', '--recursive', action='store_true',
                       help='递归处理子目录')
    parser.add_argument('--dry-run', action='store_true',
                       help='预览模式（不实际修改文件）')
    parser.add_argument('--no-backup', action='store_true',
                       help='不创建备份文件')
    parser.add_argument('--no-copyright', action='store_true',
                       help='不保留版权声明')
    parser.add_argument('--no-important', action='store_true',
                       help='不保留重要注释（TODO, FIXME等）')
    parser.add_argument('--no-pragma', action='store_true',
                       help='不保留编译指令（#pragma mark等）')

    args = parser.parse_args()

    # 检查路径
    path = Path(args.path)
    if not path.exists():
        print(f"❌ 路径不存在: {path}")
        sys.exit(1)

    # 创建注释移除器
    remover = CommentRemover(
        keep_copyright=not args.no_copyright,
        keep_important=not args.no_important,
        keep_pragma=not args.no_pragma
    )

    # 创建文件处理器
    processor = FileProcessor(
        remover=remover,
        backup=not args.no_backup,
        dry_run=args.dry_run
    )

    # 打印配置
    print("\n" + "=" * 60)
    print("⚙️  配置")
    print("=" * 60)
    print(f"路径: {path}")
    print(f"模式: {'预览' if args.dry_run else '实际修改'}")
    print(f"备份: {'是' if not args.no_backup else '否'}")
    print(f"保留版权: {'是' if not args.no_copyright else '否'}")
    print(f"保留重要注释: {'是' if not args.no_important else '否'}")
    print(f"保留编译指令: {'是' if not args.no_pragma else '否'}")
    print("=" * 60)

    # 处理文件或目录
    if path.is_file():
        processor.process_file(path)
        processor.print_summary()
    else:
        processor.process_directory(path, recursive=args.recursive)


if __name__ == '__main__':
    main()