修改地图爬取urls,应对dify的列表上限和http超时
This commit is contained in:
@@ -23,15 +23,17 @@ class CrawlerService:
|
||||
new_task = conn.execute(stmt).fetchone()
|
||||
return {"task_id": new_task[0], "is_new_task": True}
|
||||
|
||||
def add_urls(self, task_id: int, urls: list):
|
||||
def add_urls(self, task_id: int, urls_obj: dict):
|
||||
"""通用 API 实现的批量添加(含详细返回)"""
|
||||
success_urls, skipped_urls, failed_urls = [], [], []
|
||||
|
||||
# 从 urls_obj 中提取 urls 列表
|
||||
urls = urls_obj.get("urls", [])
|
||||
|
||||
with self.db.engine.begin() as conn:
|
||||
for url in urls:
|
||||
clean_url = normalize_url(url)
|
||||
try:
|
||||
# 检查是否存在 (通用写法)
|
||||
# 检查队列中是否已存在该 URL (通用写法)
|
||||
check_q = select(self.db.queue).where(
|
||||
and_(self.db.queue.c.task_id == task_id, self.db.queue.c.url == clean_url)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user