diff --git a/nodes/chunk_and_embedding.py b/nodes/chunk_and_embedding.py index f55ab1e..28372c9 100644 --- a/nodes/chunk_and_embedding.py +++ b/nodes/chunk_and_embedding.py @@ -119,7 +119,21 @@ def main(scrape_json: list, DASHSCOPE_API_KEY: str) -> dict: "chunk_index": idx, "embedding": vectors[idx] }) - + ''' + JSON 格式 + results: + [ + { + "source_url": "https://example.com", + "title": "Example Title", + "content": "Example chunk content", + "chunk_index": 0, + "embedding": [0.123, 0.456, ...] + }, + ... + ] + ''' return { - "results": result_list + "results": result_list, + "len_chunks": len(chunks) } diff --git a/nodes/iteration_output.py b/nodes/iteration_output.py new file mode 100644 index 0000000..3b41f10 --- /dev/null +++ b/nodes/iteration_output.py @@ -0,0 +1,16 @@ + +def main(iteration_output: dict, url: str, len_chunks: float, body: str, status_code: float): + ''' + 迭代器运行结果记录 + iteration_output: 迭代器运行结果记录 + url: 待处理的URL + len_chunks: 分块后的文本块数量 + body: 从URL获取的响应体 + status_code: 从URL获取的响应状态码 + ''' + if iteration_output is None: + iteration_output = {} + + return { + "result": arg1 + arg2, + } diff --git a/nodes/parse_pending_urls.py b/nodes/parse_pending_urls.py index bcd6357..b830ce4 100644 --- a/nodes/parse_pending_urls.py +++ b/nodes/parse_pending_urls.py @@ -16,5 +16,6 @@ def main(status_code: float, body: str): urls = body["data"]["urls"] return { - "urls": urls + "urls": urls, + "iteration_output": {} # 迭代器运行结果记录,用于传入给迭代器进行记录运行情况 }