Files
wiki_crawler/scripts/test_apis.py

96 lines
3.1 KiB
Python
Raw Normal View History

2025-12-22 22:08:51 +08:00
import requests
import json
import random
# 配置后端地址
2025-12-28 10:33:47 +08:00
BASE_URL = "http://47.122.127.178"
2025-12-22 22:08:51 +08:00
def log_res(name, response):
print(f"\n=== 测试接口: {name} ===")
if response.status_code == 200:
res_json = response.json()
print(f"状态: 成功 (HTTP 200)")
print(f"返回数据: {json.dumps(res_json, indent=2, ensure_ascii=False)}")
return res_json
else:
print(f"状态: 失败 (HTTP {response.status_code})")
print(f"错误信息: {response.text}")
return None
def run_tests():
# 测试数据准备
test_root_url = f"https://example.com/wiki_{random.randint(1000, 9999)}"
# 1. 测试 /register
print("步骤 1: 注册新任务...")
res = requests.post(f"{BASE_URL}/register", json={"url": test_root_url})
data = log_res("注册任务", res)
if not data or data['code'] != 1: return
task_id = data['data']['task_id']
# 2. 测试 /add_urls
print("\n步骤 2: 模拟爬虫发现了新链接,存入队列...")
sub_urls = [
f"{test_root_url}/page1",
f"{test_root_url}/page2",
f"{test_root_url}/page1" # 故意重复一个,测试后端去重
]
res = requests.post(f"{BASE_URL}/add_urls", json={
"task_id": task_id,
"urls": sub_urls
})
log_res("存入新链接", res)
# 3. 测试 /pending_urls
print("\n步骤 3: 模拟爬虫节点获取待处理任务...")
res = requests.post(f"{BASE_URL}/pending_urls", json={
"task_id": task_id,
"limit": 2
})
data = log_res("获取待处理URL", res)
if not data or not data['data']['urls']:
print("没有获取到待处理URL停止后续测试")
return
target_url = data['data']['urls'][0]
# 4. 测试 /save_results
print("\n步骤 4: 模拟爬虫抓取完成,存入知识片段和向量...")
# 模拟一个 1536 维的向量(已处理精度)
mock_embedding = [round(random.uniform(-1, 1), 8) for _ in range(1536)]
payload = {
"task_id": task_id,
"results": [
{
"source_url": target_url,
"chunk_index": 0,
"title": "测试页面标题 - 切片1",
"content": "这是模拟抓取到的第一段网页内容...",
"embedding": mock_embedding
},
{
"source_url": target_url,
"chunk_index": 1,
"title": "测试页面标题 - 切片2",
"content": "这是模拟抓取到的第二段网页内容...",
"embedding": mock_embedding
}
]
}
res = requests.post(f"{BASE_URL}/save_results", json=payload)
log_res("保存结果", res)
2025-12-28 10:33:47 +08:00
# 5. 测试 /search
print("\n步骤 5: 测试基于向量的搜索...")
query = [round(random.uniform(-1, 1), 8) for _ in range(1536)]
res = requests.post(f"{BASE_URL}/search", json={
"task_id": None,
"query_embedding": query,
"limit": 5
})
log_res("基于向量的搜索", res)
2025-12-22 22:08:51 +08:00
print("\n✅ 所有 API 流程测试完成!")
if __name__ == "__main__":
run_tests()