Merge acb43dadc5 into 294aef8fde

2026-07-03 01:09:25 +00:00 · 2026-03-04 08:15:12 +00:00 · 2026-03-04 08:15:12 +00:00 · 36813c1608
commit 36813c1608
parent 294aef8fde acb43dadc5
8 changed files with 446 additions and 0 deletions
--- a/comprehensive-tools/README.md
+++ b/comprehensive-tools/README.md
@ -0,0 +1,68 @@
 # 综合工具集合
 本项目提供了一个综合性的工具集合，包含两个主要功能模块：网络爬虫工具和系统管理工具。
 ## 功能模块
 ### 1. 网络爬虫工具
 - 支持自定义URL输入和爬取规则配置
 - 包含数据提取、存储和导出功能
 - 实现基本的反爬机制和错误处理
 - 支持JSON和CSV格式数据导出
 ### 2. 系统管理工具
 - 批处理文本文件自动强制删除功能
 - 文件强制删除的安全确认机制
 - 系统强制关机功能及定时关机选项
 - 系统重启功能及定时重启选项
 ## 安装依赖
 ```bash
 pip install requests beautifulsoup4
 ```
 ## 使用方法
 1. 运行主程序
 ```bash
 python main.py
 ```
 2. 选择功能模块
 ### 网络爬虫工具使用示例
 1. 选择"1. 网络爬虫工具"
 2. 选择"1. 开始爬取"
 3. 输入起始URL，例如：`https://example.com`
 4. 根据提示设置爬取规则（可选）
 5. 爬取完成后选择是否保存数据及保存格式
 ### 系统管理工具使用示例
 1. 选择"2. 系统管理工具"
 2. 选择相应的功能：
   - "1. 批量删除文本文件"：删除指定目录下的所有文本文件
   - "2. 删除指定文件"：删除用户指定的文件
   - "3. 关闭系统"：关闭计算机
   - "4. 重启系统"：重启计算机
 ## 注意事项
 - 网络爬虫工具遵循robots.txt规则，请勿用于非法爬取
 - 系统管理工具的关机和重启功能需要管理员权限
 - 批量删除文件时请谨慎操作，建议先备份重要数据
 ## 代码结构
 - `main.py`：主程序，提供用户界面
 - `spider.py`：网络爬虫模块
 - `system_tools.py`：系统管理工具模块
 - `test.py`：测试文件
 - `README.md`：使用说明
 ## 贡献
 欢迎提交问题和改进建议。
--- a/comprehensive-tools/pycache/main.cpython-314.pyc
+++ b/comprehensive-tools/pycache/main.cpython-314.pyc
--- a/comprehensive-tools/pycache/spider.cpython-314.pyc
+++ b/comprehensive-tools/pycache/spider.cpython-314.pyc
--- a/comprehensive-tools/pycache/system_tools.cpython-314.pyc
+++ b/comprehensive-tools/pycache/system_tools.cpython-314.pyc
--- a/comprehensive-tools/main.py
+++ b/comprehensive-tools/main.py
@ -0,0 +1,152 @@
 from spider import WebSpider
 from system_tools import SystemTools
 def print_menu():
    print('=' * 60)
    print('综合工具集合')
    print('=' * 60)
    print('1. 网络爬虫工具')
    print('2. 系统管理工具')
    print('0. 退出')
    print('=' * 60)
 def spider_menu():
    spider = WebSpider()
    while True:
        print('\n网络爬虫工具')
        print('1. 开始爬取')
        print('2. 设置代理')
        print('3. 设置爬取深度')
        print('0. 返回主菜单')
        choice = input('请选择: ')
        if choice == '1':
            url = input('请输入起始URL: ')
            rules_input = input('是否设置爬取规则? (y/n): ')
            rules = None
            if rules_input.lower() == 'y':
                rules = []
                while True:
                    selector = input('请输入CSS选择器: ')
                    extract_input = input('是否设置提取规则? (y/n): ')
                    extract = {}
                    if extract_input.lower() == 'y':
                        while True:
                            key = input('请输入字段名: ')
                            extractor = input('请输入提取方式 (text 或 attr:属性名): ')
                            extract[key] = extractor
                            more = input('是否添加更多提取规则? (y/n): ')
                            if more.lower() != 'y':
                                break
                    rules.append({'selector': selector, 'extract': extract})
                    more_rule = input('是否添加更多爬取规则? (y/n): ')
                    if more_rule.lower() != 'y':
                        break
            data = spider.crawl(url, rules)
            if data:
                save_input = input('是否保存数据? (y/n): ')
                if save_input.lower() == 'y':
                    format = input('保存格式 (json/csv): ')
                    filename = input('请输入文件名: ')
                    if format.lower() == 'json':
                        spider.save_to_json(data, filename + '.json')
                        print(f'数据已保存到 {filename}.json')
                    elif format.lower() == 'csv':
                        spider.save_to_csv(data, filename + '.csv')
                        print(f'数据已保存到 {filename}.csv')
        elif choice == '2':
            proxy = input('请输入代理地址 (格式: http://ip:port): ')
            spider.set_proxies({'http': proxy, 'https': proxy})
            print('代理设置成功')
        elif choice == '3':
            depth = int(input('请输入爬取深度: '))
            spider.set_max_depth(depth)
            print('爬取深度设置成功')
        elif choice == '0':
            break
        else:
            print('无效选择，请重新输入')
 def system_tools_menu():
    system_tools = SystemTools()
    while True:
        print('\n系统管理工具')
        print('1. 批量删除文本文件')
        print('2. 删除指定文件')
        print('3. 关闭系统')
        print('4. 重启系统')
        print('0. 返回主菜单')
        choice = input('请选择: ')
        if choice == '1':
            directory = input('请输入目录路径: ')
            force = input('是否强制删除 (y/n): ').lower() == 'y'
            result = system_tools.batch_delete_text_files(directory, force)
            print(f'成功删除: {len(result["deleted"])} 个文件')
            print(f'失败: {len(result["failed"])} 个文件')
            if result["failed"]:
                print('失败列表:')
                for file, error in result["failed"]:
                    print(f'  - {file}: {error}')
        elif choice == '2':
            files = input('请输入文件路径，多个文件用逗号分隔: ').split(',')
            files = [f.strip() for f in files]
            force = input('是否强制删除 (y/n): ').lower() == 'y'
            result = system_tools.delete_files(files, force)
            print(f'成功删除: {len(result["deleted"])} 个文件')
            print(f'失败: {len(result["failed"])} 个文件')
            if result["failed"]:
                print('失败列表:')
                for file, error in result["failed"]:
                    print(f'  - {file}: {error}')
        elif choice == '3':
            timeout = int(input('请输入延迟时间 (秒，0表示立即): '))
            force = input('是否强制关闭 (y/n): ').lower() == 'y'
            system_tools.shutdown_system(force, timeout)
        elif choice == '4':
            timeout = int(input('请输入延迟时间 (秒，0表示立即): '))
            force = input('是否强制重启 (y/n): ').lower() == 'y'
            system_tools.restart_system(force, timeout)
        elif choice == '0':
            break
        else:
            print('无效选择，请重新输入')
 def main():
    while True:
        print_menu()
        choice = input('请选择: ')
        if choice == '1':
            spider_menu()
        elif choice == '2':
            system_tools_menu()
        elif choice == '0':
            print('感谢使用，再见！')
            break
        else:
            print('无效选择，请重新输入')
 if __name__ == '__main__':
    main()
--- a/comprehensive-tools/spider.py
+++ b/comprehensive-tools/spider.py
@ -0,0 +1,81 @@
 import requests
 from bs4 import BeautifulSoup
 import json
 import csv
 import time
 import random
 from urllib.parse import urljoin, urlparse
 class WebSpider:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        self.proxies = {}
        self.visited_urls = set()
        self.max_depth = 2
    def set_proxies(self, proxies):
        self.proxies = proxies
    def set_max_depth(self, depth):
        self.max_depth = depth
    def crawl(self, start_url, rules=None, depth=0):
        if depth > self.max_depth or start_url in self.visited_urls:
            return []
        self.visited_urls.add(start_url)
        print(f'Crawling: {start_url}')
        try:
            time.sleep(random.uniform(1, 3))  # 反爬机制
            response = requests.get(start_url, headers=self.headers, proxies=self.proxies, timeout=10, verify=False)  # 禁用SSL验证
            response.raise_for_status()
        except Exception as e:
            print(f'Error crawling {start_url}: {e}')
            return []
        soup = BeautifulSoup(response.text, 'html.parser')
        data = []
        if rules:
            for rule in rules:
                elements = soup.select(rule['selector'])
                for element in elements:
                    item = {}
                    if 'extract' in rule:
                        for key, extractor in rule['extract'].items():
                            if extractor == 'text':
                                item[key] = element.get_text(strip=True)
                            elif extractor.startswith('attr:'):
                                attr = extractor.split(':', 1)[1]
                                item[key] = element.get(attr, '')
                    data.append(item)
        links = []
        for a in soup.find_all('a', href=True):
            href = a['href']
            absolute_url = urljoin(start_url, href)
            parsed_url = urlparse(absolute_url)
            if parsed_url.scheme in ['http', 'https']:
                links.append(absolute_url)
        for link in links[:10]:  # 限制爬取链接数量
            data.extend(self.crawl(link, rules, depth + 1))
        return data
    def save_to_json(self, data, filename):
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    def save_to_csv(self, data, filename):
        if not data:
            return
        keys = data[0].keys()
        with open(filename, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            writer.writeheader()
            writer.writerows(data)
--- a/comprehensive-tools/system_tools.py
+++ b/comprehensive-tools/system_tools.py
@ -0,0 +1,97 @@
 import os
 import shutil
 import subprocess
 import time
 import ctypes
 class SystemTools:
    def __init__(self):
        pass
    def delete_files(self, file_paths, force=False):
        """删除文件列表"""
        deleted = []
        failed = []
        for file_path in file_paths:
            if not os.path.exists(file_path):
                failed.append((file_path, 'File not found'))
                continue
            if not force:
                confirm = input(f'Are you sure you want to delete {file_path}? (y/n): ')
                if confirm.lower() != 'y':
                    failed.append((file_path, 'User cancelled'))
                    continue
            try:
                if os.path.isdir(file_path):
                    shutil.rmtree(file_path)
                else:
                    os.remove(file_path)
                deleted.append(file_path)
                print(f'Deleted: {file_path}')
            except Exception as e:
                failed.append((file_path, str(e)))
                print(f'Failed to delete {file_path}: {e}')
        return {'deleted': deleted, 'failed': failed}
    def batch_delete_text_files(self, directory, force=False):
        """批量删除目录中的文本文件"""
        if not os.path.exists(directory):
            print(f'Directory not found: {directory}')
            return {'deleted': [], 'failed': []}
        text_files = []
        for root, dirs, files in os.walk(directory):
            for file in files:
                if file.endswith('.txt'):
                    text_files.append(os.path.join(root, file))
        print(f'Found {len(text_files)} text files to delete')
        return self.delete_files(text_files, force)
    def shutdown_system(self, force=False, timeout=0):
        """关闭系统"""
        if not force:
            confirm = input('Are you sure you want to shutdown the system? (y/n): ')
            if confirm.lower() != 'y':
                print('Shutdown cancelled')
                return False
        if timeout > 0:
            print(f'System will shutdown in {timeout} seconds...')
            time.sleep(timeout)
        try:
            if os.name == 'nt':  # Windows
                subprocess.run(['shutdown', '/s', '/t', '0'], check=True)
            else:  # Unix-like
                subprocess.run(['shutdown', '-h', 'now'], check=True)
            return True
        except Exception as e:
            print(f'Error shutting down system: {e}')
            return False
    def restart_system(self, force=False, timeout=0):
        """重启系统"""
        if not force:
            confirm = input('Are you sure you want to restart the system? (y/n): ')
            if confirm.lower() != 'y':
                print('Restart cancelled')
                return False
        if timeout > 0:
            print(f'System will restart in {timeout} seconds...')
            time.sleep(timeout)
        try:
            if os.name == 'nt':  # Windows
                subprocess.run(['shutdown', '/r', '/t', '0'], check=True)
            else:  # Unix-like
                subprocess.run(['shutdown', '-r', 'now'], check=True)
            return True
        except Exception as e:
            print(f'Error restarting system: {e}')
            return False
--- a/comprehensive-tools/test.py
+++ b/comprehensive-tools/test.py
@ -0,0 +1,48 @@
 from spider import WebSpider
 from system_tools import SystemTools
 # 测试网络爬虫
 def test_spider():
    print('测试网络爬虫...')
    spider = WebSpider()
    spider.set_max_depth(1)
    # 测试简单爬取
    url = 'https://example.com'
    data = spider.crawl(url)
    print(f'爬取到 {len(data)} 条数据')
    # 测试带规则的爬取
    rules = [{
        'selector': 'a',
        'extract': {
            'text': 'text',
            'href': 'attr:href'
        }
    }]
    data_with_rules = spider.crawl(url, rules)
    print(f'带规则爬取到 {len(data_with_rules)} 条数据')
    # 测试保存功能
    if data_with_rules:
        spider.save_to_json(data_with_rules, 'test_spider.json')
        spider.save_to_csv(data_with_rules, 'test_spider.csv')
        print('数据已保存到 test_spider.json 和 test_spider.csv')
 # 测试系统管理工具
 def test_system_tools():
    print('\n测试系统管理工具...')
    system_tools = SystemTools()
    # 创建测试文件
    with open('test_file.txt', 'w') as f:
        f.write('测试文件')
    # 测试删除文件
    result = system_tools.delete_files(['test_file.txt'], force=True)
    print(f'删除文件结果: {result}')
 if __name__ == '__main__':
    test_spider()
    test_system_tools()
    print('\n测试完成！')