Files
wiki_crawler/scripts/test_apis.py
2025-12-22 22:08:51 +08:00

87 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import requests
import json
import random
# 配置后端地址
BASE_URL = "http://127.0.0.1:8000"
def log_res(name, response):
print(f"\n=== 测试接口: {name} ===")
if response.status_code == 200:
res_json = response.json()
print(f"状态: 成功 (HTTP 200)")
print(f"返回数据: {json.dumps(res_json, indent=2, ensure_ascii=False)}")
return res_json
else:
print(f"状态: 失败 (HTTP {response.status_code})")
print(f"错误信息: {response.text}")
return None
def run_tests():
# 测试数据准备
test_root_url = f"https://example.com/wiki_{random.randint(1000, 9999)}"
# 1. 测试 /register
print("步骤 1: 注册新任务...")
res = requests.post(f"{BASE_URL}/register", json={"url": test_root_url})
data = log_res("注册任务", res)
if not data or data['code'] != 1: return
task_id = data['data']['task_id']
# 2. 测试 /add_urls
print("\n步骤 2: 模拟爬虫发现了新链接,存入队列...")
sub_urls = [
f"{test_root_url}/page1",
f"{test_root_url}/page2",
f"{test_root_url}/page1" # 故意重复一个,测试后端去重
]
res = requests.post(f"{BASE_URL}/add_urls", json={
"task_id": task_id,
"urls": sub_urls
})
log_res("存入新链接", res)
# 3. 测试 /pending_urls
print("\n步骤 3: 模拟爬虫节点获取待处理任务...")
res = requests.post(f"{BASE_URL}/pending_urls", json={
"task_id": task_id,
"limit": 2
})
data = log_res("获取待处理URL", res)
if not data or not data['data']['urls']:
print("没有获取到待处理URL停止后续测试")
return
target_url = data['data']['urls'][0]
# 4. 测试 /save_results
print("\n步骤 4: 模拟爬虫抓取完成,存入知识片段和向量...")
# 模拟一个 1536 维的向量(已处理精度)
mock_embedding = [round(random.uniform(-1, 1), 8) for _ in range(1536)]
payload = {
"task_id": task_id,
"results": [
{
"source_url": target_url,
"chunk_index": 0,
"title": "测试页面标题 - 切片1",
"content": "这是模拟抓取到的第一段网页内容...",
"embedding": mock_embedding
},
{
"source_url": target_url,
"chunk_index": 1,
"title": "测试页面标题 - 切片2",
"content": "这是模拟抓取到的第二段网页内容...",
"embedding": mock_embedding
}
]
}
res = requests.post(f"{BASE_URL}/save_results", json=payload)
log_res("保存结果", res)
print("\n✅ 所有 API 流程测试完成!")
if __name__ == "__main__":
run_tests()