mirror of
https://github.com/codecrafters-io/build-your-own-x
synced 2026-07-02 16:59:25 +00:00
48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
from spider import WebSpider
|
|
from system_tools import SystemTools
|
|
|
|
# 测试网络爬虫
|
|
def test_spider():
|
|
print('测试网络爬虫...')
|
|
spider = WebSpider()
|
|
spider.set_max_depth(1)
|
|
|
|
# 测试简单爬取
|
|
url = 'https://example.com'
|
|
data = spider.crawl(url)
|
|
print(f'爬取到 {len(data)} 条数据')
|
|
|
|
# 测试带规则的爬取
|
|
rules = [{
|
|
'selector': 'a',
|
|
'extract': {
|
|
'text': 'text',
|
|
'href': 'attr:href'
|
|
}
|
|
}]
|
|
data_with_rules = spider.crawl(url, rules)
|
|
print(f'带规则爬取到 {len(data_with_rules)} 条数据')
|
|
|
|
# 测试保存功能
|
|
if data_with_rules:
|
|
spider.save_to_json(data_with_rules, 'test_spider.json')
|
|
spider.save_to_csv(data_with_rules, 'test_spider.csv')
|
|
print('数据已保存到 test_spider.json 和 test_spider.csv')
|
|
|
|
# 测试系统管理工具
|
|
def test_system_tools():
|
|
print('\n测试系统管理工具...')
|
|
system_tools = SystemTools()
|
|
|
|
# 创建测试文件
|
|
with open('test_file.txt', 'w') as f:
|
|
f.write('测试文件')
|
|
|
|
# 测试删除文件
|
|
result = system_tools.delete_files(['test_file.txt'], force=True)
|
|
print(f'删除文件结果: {result}')
|
|
|
|
if __name__ == '__main__':
|
|
test_spider()
|
|
test_system_tools()
|
|
print('\n测试完成!')
|