新增获取全部知识库的接口,api版本号重归到v1

This commit is contained in:
2026-01-20 02:47:03 +08:00
parent 860ada3334
commit 155974572c
10 changed files with 130 additions and 184 deletions

View File

@@ -30,6 +30,11 @@ class CrawlerService:
self._active_workers: Dict[int, set] = {}
self._lock = threading.Lock()
def get_knowledge_base_list(self):
"""获取知识库列表"""
return data_service.get_all_tasks()
def _track_start(self, task_id: int, url: str):
"""[Internal] 标记某个URL开始处理"""
with self._lock:

View File

@@ -63,6 +63,38 @@ class DataService:
and_(self.db.queue.c.task_id == task_id, self.db.queue.c.url == clean_url)
).values(status=status))
def get_all_tasks(self):
"""
[新增] 获取所有已注册的任务(知识库列表)
用于前端展示或工作流的路由选择
"""
with self.db.engine.connect() as conn:
# 查询 id, root_url, created_at (如果有的话)
# 这里假设 tasks 表里有 id 和 root_url
stmt = select(self.db.tasks.c.id, self.db.tasks.c.root_url).order_by(self.db.tasks.c.id)
rows = conn.execute(stmt).fetchall()
# 返回精简列表
return [
{"task_id": r[0], "root_url": r[1], "name": self._extract_name(r[1])}
for r in rows
]
def _extract_name(self, url: str) -> str:
"""辅助方法:从 URL 提取一个简短的名字作为 Alias"""
try:
from urllib.parse import urlparse
domain = urlparse(url).netloc
# 比如 docs.firecrawl.dev -> firecrawl
parts = domain.split('.')
if len(parts) >= 2:
return parts[-2]
return domain
except:
return url
# ... (保持 get_task_monitor_data, save_chunks, search 等方法不变) ...
def get_task_monitor_data(self, task_id: int):
"""[数据库层监控] 获取持久化的任务状态"""
with self.db.engine.connect() as conn: