Files
wiki_crawler/nodes/parse_search.py
2025-12-23 01:43:53 +08:00

50 lines
1.4 KiB
Python

def check_status(status_code: float, body: str):
'''
检查状态码和约定的返回值
'''
if status_code != 200:
raise Exception(f"注册任务失败,状态码:{status_code}")
if "code" not in body or body["code"] != 1:
raise Exception(f"注册任务失败,返回值:{body}")
def format_rag_context(data: list) -> str:
'''
将检索到的 data 列表格式化为 Markdown 文本
'''
if not data:
return "未找到相关的参考资料。"
formatted_parts = []
for i, item in enumerate(data):
# 提取字段(对应你数据库和 API 返回的字段)
title = item.get("title") or "无标题"
url = item.get("source_url") or "未知来源"
content = item.get("content", "").strip()
c_idx = item.get("chunk_index", 0)
# 构造 Markdown 块
block = (
f"### [资料 {i+1}] {title}\n"
f"**来源**: {url}\n"
f"**切片索引**: {c_idx}\n"
f"**内容**: {content}"
)
formatted_parts.append(block)
# 使用分隔符连接多个资料块
return "\n\n---\n\n".join(formatted_parts)
def main(status_code: float, body: str):
try:
check_status(status_code, body)
except Exception as e:
raise e
data = body["data"]
rag_context = format_rag_context(data)
return {
"RAG_results": rag_context
}