完成api最后调试
This commit is contained in:
@@ -36,4 +36,8 @@ async def save_results(req: SaveResultsRequest):
|
|||||||
data = crawler_service.save_results(req.task_id, req.results)
|
data = crawler_service.save_results(req.task_id, req.results)
|
||||||
return make_response(1, "Success", data)
|
return make_response(1, "Success", data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return make_response(0, str(e))
|
return make_response(0, str(e))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
|
||||||
@@ -64,14 +64,7 @@ class CrawlerService:
|
|||||||
).values(status='processing')
|
).values(status='processing')
|
||||||
conn.execute(upd)
|
conn.execute(upd)
|
||||||
return {"urls": urls}
|
return {"urls": urls}
|
||||||
from sqlalchemy import select, insert, update, and_
|
|
||||||
from .database import db_instance
|
|
||||||
from .utils import normalize_url
|
|
||||||
|
|
||||||
class CrawlerService:
|
|
||||||
def __init__(self):
|
|
||||||
self.db = db_instance
|
|
||||||
|
|
||||||
def save_results(self, task_id: int, results: list):
|
def save_results(self, task_id: int, results: list):
|
||||||
"""
|
"""
|
||||||
保存同一 URL 的多个切片。
|
保存同一 URL 的多个切片。
|
||||||
@@ -164,4 +157,5 @@ class CrawlerService:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
crawler_service = CrawlerService()
|
crawler_service = CrawlerService()
|
||||||
@@ -135,5 +135,4 @@ def main(scrape_json: list, DASHSCOPE_API_KEY: str) -> dict:
|
|||||||
'''
|
'''
|
||||||
return {
|
return {
|
||||||
"results": result_list,
|
"results": result_list,
|
||||||
"len_chunks": len(chunks)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,25 @@
|
|||||||
|
def check_status(status_code: float, body: str):
|
||||||
def main(iteration_output: dict, url: str, len_chunks: float, body: str, status_code: float):
|
'''
|
||||||
|
检查状态码和约定的返回值
|
||||||
|
'''
|
||||||
|
if status_code != 200:
|
||||||
|
raise Exception(f"注册任务失败,状态码:{status_code}")
|
||||||
|
if "code" not in body or body["code"] != 1:
|
||||||
|
raise Exception(f"注册任务失败,返回值:{body}")
|
||||||
|
def main(iteration_output: list[dict], status_code: float, body: str):
|
||||||
'''
|
'''
|
||||||
迭代器运行结果记录
|
迭代器运行结果记录
|
||||||
iteration_output: 迭代器运行结果记录
|
iteration_output: 迭代器运行结果记录
|
||||||
url: 待处理的URL
|
|
||||||
len_chunks: 分块后的文本块数量
|
|
||||||
body: 从URL获取的响应体
|
|
||||||
status_code: 从URL获取的响应状态码
|
status_code: 从URL获取的响应状态码
|
||||||
|
body: 从URL获取的响应体
|
||||||
'''
|
'''
|
||||||
if iteration_output is None:
|
try:
|
||||||
iteration_output = {}
|
check_status(status_code, body)
|
||||||
|
data = body["data"]
|
||||||
|
iteration_output.append(data)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"result": arg1 + arg2,
|
"iteration_output": iteration_output,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,5 +17,5 @@ def main(status_code: float, body: str):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"urls": urls,
|
"urls": urls,
|
||||||
"iteration_output": {} # 迭代器运行结果记录,用于传入给迭代器进行记录运行情况
|
"iteration_output": [] # 迭代器运行结果记录,用于传入给迭代器进行记录运行情况
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user