新增业务原子化;新增fireccrawl的baseurl修改
This commit is contained in:
@@ -93,7 +93,62 @@ class DataService:
|
||||
except:
|
||||
return url
|
||||
|
||||
# ... (保持 get_task_monitor_data, save_chunks, search 等方法不变) ...
|
||||
def get_task_by_root_url(self, url: str):
|
||||
"""返回已存在任务的 id(如果没有则返回 None)"""
|
||||
clean_url = normalize_url(url)
|
||||
with self.db.engine.connect() as conn:
|
||||
row = conn.execute(select(self.db.tasks.c.id).where(self.db.tasks.c.root_url == clean_url)).fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
def create_task_with_urls(self, url: str, urls: list[str]):
|
||||
"""
|
||||
原子化:在单个事务中创建任务并批量插入 URL(去重)。
|
||||
如果任务已存在,则不会创建新任务,而是把新的 URL 去重后插入到该任务下。
|
||||
|
||||
返回:{"task_id": int, "is_new_task": bool, "added": int}
|
||||
"""
|
||||
clean_root = normalize_url(url)
|
||||
clean_urls = [normalize_url(u) for u in urls]
|
||||
added_count = 0
|
||||
with self.db.engine.begin() as conn:
|
||||
# 1. 尝试获取已存在任务
|
||||
existing = conn.execute(select(self.db.tasks.c.id).where(self.db.tasks.c.root_url == clean_root)).fetchone()
|
||||
if existing:
|
||||
task_id = existing[0]
|
||||
is_new = False
|
||||
else:
|
||||
# 创建新任务并返回 id
|
||||
stmt = insert(self.db.tasks).values(root_url=clean_root).returning(self.db.tasks.c.id)
|
||||
task_id = conn.execute(stmt).fetchone()[0]
|
||||
is_new = True
|
||||
|
||||
# 2. 批量插入 urls,跳过已存在项
|
||||
# 使用临时表或单条插入均可,这里逐条检查以保证兼容性
|
||||
for u in clean_urls:
|
||||
try:
|
||||
exists_q = select(self.db.queue.c.id).where(
|
||||
and_(self.db.queue.c.task_id == task_id, self.db.queue.c.url == u)
|
||||
)
|
||||
if not conn.execute(exists_q).fetchone():
|
||||
conn.execute(insert(self.db.queue).values(task_id=task_id, url=u, status='pending'))
|
||||
added_count += 1
|
||||
except Exception:
|
||||
# 单条插入失败时忽略,继续处理剩余 URL
|
||||
continue
|
||||
|
||||
return {"task_id": task_id, "is_new_task": is_new, "added": added_count}
|
||||
|
||||
def delete_task(self, task_id: int):
|
||||
"""删除任务与相关队列与分片(谨慎使用,主要用于回滚)"""
|
||||
with self.db.engine.begin() as conn:
|
||||
try:
|
||||
conn.execute(text("DELETE FROM chunks WHERE task_id = :tid"), {"tid": task_id})
|
||||
conn.execute(text("DELETE FROM queue WHERE task_id = :tid"), {"tid": task_id})
|
||||
conn.execute(text("DELETE FROM tasks WHERE id = :tid"), {"tid": task_id})
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete task {task_id}: {e}")
|
||||
return False
|
||||
|
||||
def get_task_monitor_data(self, task_id: int):
|
||||
"""[数据库层监控] 获取持久化的任务状态"""
|
||||
|
||||
Reference in New Issue
Block a user