新增业务原子化;新增fireccrawl的baseurl修改

This commit is contained in:
2026-01-21 14:55:49 +08:00
parent 155974572c
commit 96eefd9bb7
3 changed files with 112 additions and 36 deletions

View File

@@ -93,7 +93,62 @@ class DataService:
except:
return url
# ... (保持 get_task_monitor_data, save_chunks, search 等方法不变) ...
def get_task_by_root_url(self, url: str):
"""返回已存在任务的 id如果没有则返回 None"""
clean_url = normalize_url(url)
with self.db.engine.connect() as conn:
row = conn.execute(select(self.db.tasks.c.id).where(self.db.tasks.c.root_url == clean_url)).fetchone()
return row[0] if row else None
def create_task_with_urls(self, url: str, urls: list[str]):
"""
原子化:在单个事务中创建任务并批量插入 URL去重
如果任务已存在,则不会创建新任务,而是把新的 URL 去重后插入到该任务下。
返回:{"task_id": int, "is_new_task": bool, "added": int}
"""
clean_root = normalize_url(url)
clean_urls = [normalize_url(u) for u in urls]
added_count = 0
with self.db.engine.begin() as conn:
# 1. 尝试获取已存在任务
existing = conn.execute(select(self.db.tasks.c.id).where(self.db.tasks.c.root_url == clean_root)).fetchone()
if existing:
task_id = existing[0]
is_new = False
else:
# 创建新任务并返回 id
stmt = insert(self.db.tasks).values(root_url=clean_root).returning(self.db.tasks.c.id)
task_id = conn.execute(stmt).fetchone()[0]
is_new = True
# 2. 批量插入 urls跳过已存在项
# 使用临时表或单条插入均可,这里逐条检查以保证兼容性
for u in clean_urls:
try:
exists_q = select(self.db.queue.c.id).where(
and_(self.db.queue.c.task_id == task_id, self.db.queue.c.url == u)
)
if not conn.execute(exists_q).fetchone():
conn.execute(insert(self.db.queue).values(task_id=task_id, url=u, status='pending'))
added_count += 1
except Exception:
# 单条插入失败时忽略,继续处理剩余 URL
continue
return {"task_id": task_id, "is_new_task": is_new, "added": added_count}
def delete_task(self, task_id: int):
"""删除任务与相关队列与分片(谨慎使用,主要用于回滚)"""
with self.db.engine.begin() as conn:
try:
conn.execute(text("DELETE FROM chunks WHERE task_id = :tid"), {"tid": task_id})
conn.execute(text("DELETE FROM queue WHERE task_id = :tid"), {"tid": task_id})
conn.execute(text("DELETE FROM tasks WHERE id = :tid"), {"tid": task_id})
return True
except Exception as e:
logger.error(f"Failed to delete task {task_id}: {e}")
return False
def get_task_monitor_data(self, task_id: int):
"""[数据库层监控] 获取持久化的任务状态"""