Files
wiki_crawler/nodes/parse_pending_urls.py

30 lines
763 B
Python
Raw Permalink Normal View History

2025-12-29 14:42:33 +08:00
import json
def parse_response(status_code: float, body: str):
2025-12-22 22:08:51 +08:00
'''
检查状态码和约定的返回值
2025-12-29 14:42:33 +08:00
并且返回正确的body
2025-12-22 22:08:51 +08:00
'''
if status_code != 200:
raise Exception(f"注册任务失败,状态码:{status_code}")
2025-12-29 14:42:33 +08:00
data = json.loads(body)
if "code" not in data or data["code"] != 1:
2025-12-22 22:08:51 +08:00
raise Exception(f"注册任务失败,返回值:{body}")
2025-12-29 14:42:33 +08:00
return data["data"]
2025-12-22 22:08:51 +08:00
def main(status_code: float, body: str):
try:
2025-12-29 14:42:33 +08:00
data = parse_response(status_code, body)
2025-12-22 22:08:51 +08:00
except Exception as e:
raise e
2025-12-29 14:42:33 +08:00
urls = data["urls"]
2025-12-22 22:08:51 +08:00
return {
2025-12-22 22:33:18 +08:00
"urls": urls,
2025-12-22 22:50:07 +08:00
"iteration_output": [] # 迭代器运行结果记录,用于传入给迭代器进行记录运行情况
2025-12-22 22:08:51 +08:00
}