Add comprehensive tools collection with web spider and system management tools

2026-07-02 16:59:25 +00:00 · 2026-03-04 16:09:01 +08:00 · 2026-03-04 16:09:01 +08:00 · acb43dadc5
commit acb43dadc5
parent 294aef8fde
8 changed files with 446 additions and 0 deletions
--- a/comprehensive-tools/README.md
+++ b/comprehensive-tools/README.md
@ -0,0 +1,68 @@
+# 综合工具集合
+
+本项目提供了一个综合性的工具集合，包含两个主要功能模块：网络爬虫工具和系统管理工具。
+
+## 功能模块
+
+### 1. 网络爬虫工具
+- 支持自定义URL输入和爬取规则配置
+- 包含数据提取、存储和导出功能
+- 实现基本的反爬机制和错误处理
+- 支持JSON和CSV格式数据导出
+
+### 2. 系统管理工具
+- 批处理文本文件自动强制删除功能
+- 文件强制删除的安全确认机制
+- 系统强制关机功能及定时关机选项
+- 系统重启功能及定时重启选项
+
+## 安装依赖
+
+```bash
+pip install requests beautifulsoup4
+```
+
+## 使用方法
+
+1. 运行主程序
+
+```bash
+python main.py
+```
+
+2. 选择功能模块
+
+### 网络爬虫工具使用示例
+
+1. 选择"1. 网络爬虫工具"
+2. 选择"1. 开始爬取"
+3. 输入起始URL，例如：`https://example.com`
+4. 根据提示设置爬取规则（可选）
+5. 爬取完成后选择是否保存数据及保存格式
+
+### 系统管理工具使用示例
+
+1. 选择"2. 系统管理工具"
+2. 选择相应的功能：
+   - "1. 批量删除文本文件"：删除指定目录下的所有文本文件
+   - "2. 删除指定文件"：删除用户指定的文件
+   - "3. 关闭系统"：关闭计算机
+   - "4. 重启系统"：重启计算机
+
+## 注意事项
+
+- 网络爬虫工具遵循robots.txt规则，请勿用于非法爬取
+- 系统管理工具的关机和重启功能需要管理员权限
+- 批量删除文件时请谨慎操作，建议先备份重要数据
+
+## 代码结构
+
+- `main.py`：主程序，提供用户界面
+- `spider.py`：网络爬虫模块
+- `system_tools.py`：系统管理工具模块
+- `test.py`：测试文件
+- `README.md`：使用说明
+
+## 贡献
+
+欢迎提交问题和改进建议。
--- a/comprehensive-tools/pycache/main.cpython-314.pyc
+++ b/comprehensive-tools/pycache/main.cpython-314.pyc
--- a/comprehensive-tools/pycache/spider.cpython-314.pyc
+++ b/comprehensive-tools/pycache/spider.cpython-314.pyc
--- a/comprehensive-tools/pycache/system_tools.cpython-314.pyc
+++ b/comprehensive-tools/pycache/system_tools.cpython-314.pyc
--- a/comprehensive-tools/main.py
+++ b/comprehensive-tools/main.py
@ -0,0 +1,152 @@
+from spider import WebSpider
+from system_tools import SystemTools
+
+def print_menu():
+    print('=' * 60)
+    print('综合工具集合')
+    print('=' * 60)
+    print('1. 网络爬虫工具')
+    print('2. 系统管理工具')
+    print('0. 退出')
+    print('=' * 60)
+
+def spider_menu():
+    spider = WebSpider()
+    
+    while True:
+        print('\n网络爬虫工具')
+        print('1. 开始爬取')
+        print('2. 设置代理')
+        print('3. 设置爬取深度')
+        print('0. 返回主菜单')
+        
+        choice = input('请选择: ')
+        
+        if choice == '1':
+            url = input('请输入起始URL: ')
+            rules_input = input('是否设置爬取规则? (y/n): ')
+            
+            rules = None
+            if rules_input.lower() == 'y':
+                rules = []
+                while True:
+                    selector = input('请输入CSS选择器: ')
+                    extract_input = input('是否设置提取规则? (y/n): ')
+                    
+                    extract = {}
+                    if extract_input.lower() == 'y':
+                        while True:
+                            key = input('请输入字段名: ')
+                            extractor = input('请输入提取方式 (text 或 attr:属性名): ')
+                            extract[key] = extractor
+                            
+                            more = input('是否添加更多提取规则? (y/n): ')
+                            if more.lower() != 'y':
+                                break
+                    
+                    rules.append({'selector': selector, 'extract': extract})
+                    
+                    more_rule = input('是否添加更多爬取规则? (y/n): ')
+                    if more_rule.lower() != 'y':
+                        break
+            
+            data = spider.crawl(url, rules)
+            
+            if data:
+                save_input = input('是否保存数据? (y/n): ')
+                if save_input.lower() == 'y':
+                    format = input('保存格式 (json/csv): ')
+                    filename = input('请输入文件名: ')
+                    
+                    if format.lower() == 'json':
+                        spider.save_to_json(data, filename + '.json')
+                        print(f'数据已保存到 {filename}.json')
+                    elif format.lower() == 'csv':
+                        spider.save_to_csv(data, filename + '.csv')
+                        print(f'数据已保存到 {filename}.csv')
+        
+        elif choice == '2':
+            proxy = input('请输入代理地址 (格式: http://ip:port): ')
+            spider.set_proxies({'http': proxy, 'https': proxy})
+            print('代理设置成功')
+        
+        elif choice == '3':
+            depth = int(input('请输入爬取深度: '))
+            spider.set_max_depth(depth)
+            print('爬取深度设置成功')
+        
+        elif choice == '0':
+            break
+        
+        else:
+            print('无效选择，请重新输入')
+
+def system_tools_menu():
+    system_tools = SystemTools()
+    
+    while True:
+        print('\n系统管理工具')
+        print('1. 批量删除文本文件')
+        print('2. 删除指定文件')
+        print('3. 关闭系统')
+        print('4. 重启系统')
+        print('0. 返回主菜单')
+        
+        choice = input('请选择: ')
+        
+        if choice == '1':
+            directory = input('请输入目录路径: ')
+            force = input('是否强制删除 (y/n): ').lower() == 'y'
+            result = system_tools.batch_delete_text_files(directory, force)
+            print(f'成功删除: {len(result["deleted"])} 个文件')
+            print(f'失败: {len(result["failed"])} 个文件')
+            if result["failed"]:
+                print('失败列表:')
+                for file, error in result["failed"]:
+                    print(f'  - {file}: {error}')
+        
+        elif choice == '2':
+            files = input('请输入文件路径，多个文件用逗号分隔: ').split(',')
+            files = [f.strip() for f in files]
+            force = input('是否强制删除 (y/n): ').lower() == 'y'
+            result = system_tools.delete_files(files, force)
+            print(f'成功删除: {len(result["deleted"])} 个文件')
+            print(f'失败: {len(result["failed"])} 个文件')
+            if result["failed"]:
+                print('失败列表:')
+                for file, error in result["failed"]:
+                    print(f'  - {file}: {error}')
+        
+        elif choice == '3':
+            timeout = int(input('请输入延迟时间 (秒，0表示立即): '))
+            force = input('是否强制关闭 (y/n): ').lower() == 'y'
+            system_tools.shutdown_system(force, timeout)
+        
+        elif choice == '4':
+            timeout = int(input('请输入延迟时间 (秒，0表示立即): '))
+            force = input('是否强制重启 (y/n): ').lower() == 'y'
+            system_tools.restart_system(force, timeout)
+        
+        elif choice == '0':
+            break
+        
+        else:
+            print('无效选择，请重新输入')
+
+def main():
+    while True:
+        print_menu()
+        choice = input('请选择: ')
+        
+        if choice == '1':
+            spider_menu()
+        elif choice == '2':
+            system_tools_menu()
+        elif choice == '0':
+            print('感谢使用，再见！')
+            break
+        else:
+            print('无效选择，请重新输入')
+
+if __name__ == '__main__':
+    main()
--- a/comprehensive-tools/spider.py
+++ b/comprehensive-tools/spider.py
@ -0,0 +1,81 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import csv
+import time
+import random
+from urllib.parse import urljoin, urlparse
+
+class WebSpider:
+    def __init__(self):
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        self.proxies = {}
+        self.visited_urls = set()
+        self.max_depth = 2
+    
+    def set_proxies(self, proxies):
+        self.proxies = proxies
+    
+    def set_max_depth(self, depth):
+        self.max_depth = depth
+    
+    def crawl(self, start_url, rules=None, depth=0):
+        if depth > self.max_depth or start_url in self.visited_urls:
+            return []
+        
+        self.visited_urls.add(start_url)
+        print(f'Crawling: {start_url}')
+        
+        try:
+            time.sleep(random.uniform(1, 3))  # 反爬机制
+            response = requests.get(start_url, headers=self.headers, proxies=self.proxies, timeout=10, verify=False)  # 禁用SSL验证
+            response.raise_for_status()
+        except Exception as e:
+            print(f'Error crawling {start_url}: {e}')
+            return []
+        
+        soup = BeautifulSoup(response.text, 'html.parser')
+        data = []
+        
+        if rules:
+            for rule in rules:
+                elements = soup.select(rule['selector'])
+                for element in elements:
+                    item = {}
+                    if 'extract' in rule:
+                        for key, extractor in rule['extract'].items():
+                            if extractor == 'text':
+                                item[key] = element.get_text(strip=True)
+                            elif extractor.startswith('attr:'):
+                                attr = extractor.split(':', 1)[1]
+                                item[key] = element.get(attr, '')
+                    data.append(item)
+        
+        links = []
+        for a in soup.find_all('a', href=True):
+            href = a['href']
+            absolute_url = urljoin(start_url, href)
+            parsed_url = urlparse(absolute_url)
+            if parsed_url.scheme in ['http', 'https']:
+                links.append(absolute_url)
+        
+        for link in links[:10]:  # 限制爬取链接数量
+            data.extend(self.crawl(link, rules, depth + 1))
+        
+        return data
+    
+    def save_to_json(self, data, filename):
+        with open(filename, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+    
+    def save_to_csv(self, data, filename):
+        if not data:
+            return
+        
+        keys = data[0].keys()
+        with open(filename, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=keys)
+            writer.writeheader()
+            writer.writerows(data)
--- a/comprehensive-tools/system_tools.py
+++ b/comprehensive-tools/system_tools.py
@ -0,0 +1,97 @@
+import os
+import shutil
+import subprocess
+import time
+import ctypes
+
+class SystemTools:
+    def __init__(self):
+        pass
+    
+    def delete_files(self, file_paths, force=False):
+        """删除文件列表"""
+        deleted = []
+        failed = []
+        
+        for file_path in file_paths:
+            if not os.path.exists(file_path):
+                failed.append((file_path, 'File not found'))
+                continue
+            
+            if not force:
+                confirm = input(f'Are you sure you want to delete {file_path}? (y/n): ')
+                if confirm.lower() != 'y':
+                    failed.append((file_path, 'User cancelled'))
+                    continue
+            
+            try:
+                if os.path.isdir(file_path):
+                    shutil.rmtree(file_path)
+                else:
+                    os.remove(file_path)
+                deleted.append(file_path)
+                print(f'Deleted: {file_path}')
+            except Exception as e:
+                failed.append((file_path, str(e)))
+                print(f'Failed to delete {file_path}: {e}')
+        
+        return {'deleted': deleted, 'failed': failed}
+    
+    def batch_delete_text_files(self, directory, force=False):
+        """批量删除目录中的文本文件"""
+        if not os.path.exists(directory):
+            print(f'Directory not found: {directory}')
+            return {'deleted': [], 'failed': []}
+        
+        text_files = []
+        for root, dirs, files in os.walk(directory):
+            for file in files:
+                if file.endswith('.txt'):
+                    text_files.append(os.path.join(root, file))
+        
+        print(f'Found {len(text_files)} text files to delete')
+        return self.delete_files(text_files, force)
+    
+    def shutdown_system(self, force=False, timeout=0):
+        """关闭系统"""
+        if not force:
+            confirm = input('Are you sure you want to shutdown the system? (y/n): ')
+            if confirm.lower() != 'y':
+                print('Shutdown cancelled')
+                return False
+        
+        if timeout > 0:
+            print(f'System will shutdown in {timeout} seconds...')
+            time.sleep(timeout)
+        
+        try:
+            if os.name == 'nt':  # Windows
+                subprocess.run(['shutdown', '/s', '/t', '0'], check=True)
+            else:  # Unix-like
+                subprocess.run(['shutdown', '-h', 'now'], check=True)
+            return True
+        except Exception as e:
+            print(f'Error shutting down system: {e}')
+            return False
+    
+    def restart_system(self, force=False, timeout=0):
+        """重启系统"""
+        if not force:
+            confirm = input('Are you sure you want to restart the system? (y/n): ')
+            if confirm.lower() != 'y':
+                print('Restart cancelled')
+                return False
+        
+        if timeout > 0:
+            print(f'System will restart in {timeout} seconds...')
+            time.sleep(timeout)
+        
+        try:
+            if os.name == 'nt':  # Windows
+                subprocess.run(['shutdown', '/r', '/t', '0'], check=True)
+            else:  # Unix-like
+                subprocess.run(['shutdown', '-r', 'now'], check=True)
+            return True
+        except Exception as e:
+            print(f'Error restarting system: {e}')
+            return False
--- a/comprehensive-tools/test.py
+++ b/comprehensive-tools/test.py
@ -0,0 +1,48 @@
+from spider import WebSpider
+from system_tools import SystemTools
+
+# 测试网络爬虫
+def test_spider():
+    print('测试网络爬虫...')
+    spider = WebSpider()
+    spider.set_max_depth(1)
+    
+    # 测试简单爬取
+    url = 'https://example.com'
+    data = spider.crawl(url)
+    print(f'爬取到 {len(data)} 条数据')
+    
+    # 测试带规则的爬取
+    rules = [{
+        'selector': 'a',
+        'extract': {
+            'text': 'text',
+            'href': 'attr:href'
+        }
+    }]
+    data_with_rules = spider.crawl(url, rules)
+    print(f'带规则爬取到 {len(data_with_rules)} 条数据')
+    
+    # 测试保存功能
+    if data_with_rules:
+        spider.save_to_json(data_with_rules, 'test_spider.json')
+        spider.save_to_csv(data_with_rules, 'test_spider.csv')
+        print('数据已保存到 test_spider.json 和 test_spider.csv')
+
+# 测试系统管理工具
+def test_system_tools():
+    print('\n测试系统管理工具...')
+    system_tools = SystemTools()
+    
+    # 创建测试文件
+    with open('test_file.txt', 'w') as f:
+        f.write('测试文件')
+    
+    # 测试删除文件
+    result = system_tools.delete_files(['test_file.txt'], force=True)
+    print(f'删除文件结果: {result}')
+
+if __name__ == '__main__':
+    test_spider()
+    test_system_tools()
+    print('\n测试完成！')