from fastapi import APIRouter, BackgroundTasks, status from backend.services.crawler_service import crawler_service from backend.services.data_service import data_service from backend.schemas.v3 import ( TaskCreateRequest, TaskExecuteRequest, SearchRequest, ResponseBase, TaskStatusData ) router = APIRouter(prefix="/api/v3", tags=["V3 Knowledge Base"]) @router.post("/tasks", status_code=status.HTTP_201_CREATED, response_model=ResponseBase) async def create_task(req: TaskCreateRequest): """创建新任务 (Map)""" try: res = crawler_service.map_site(req.url) return ResponseBase(code=1, msg="Task Created", data=res) except Exception as e: return ResponseBase(code=0, msg=f"Map Failed: {str(e)}") @router.get("/tasks/{task_id}", response_model=ResponseBase) async def get_task_status(task_id: int): """ 实时监控: 返回数据库持久化状态 + 内存中正在运行的线程 """ # 调用 crawler_service 的聚合方法 data = crawler_service.get_task_status(task_id) if not data: return ResponseBase(code=0, msg="Task not found") return ResponseBase(code=1, msg="Success", data=data) @router.post("/tasks/{task_id}/run", status_code=status.HTTP_202_ACCEPTED, response_model=ResponseBase) async def run_task(task_id: int, req: TaskExecuteRequest, bg_tasks: BackgroundTasks): """触发后台多线程爬取""" # 简单检查任务是否存在 (查一下数据库监控数据即可) if not data_service.get_task_monitor_data(task_id): return ResponseBase(code=0, msg="Task not found") # 放入后台任务 bg_tasks.add_task(crawler_service.process_queue_concurrent, task_id, req.batch_size) return ResponseBase( code=1, msg="Background Execution Started", data={"task_id": task_id, "mode": "concurrent_thread_pool"} ) @router.post("/search", response_model=ResponseBase) async def search_knowledge(req: SearchRequest): """混合检索 + Rerank""" try: res = crawler_service.search(req.query, req.task_id, req.return_num) return ResponseBase(code=1, msg="Search Completed", data=res) except Exception as e: return ResponseBase(code=0, msg=f"Search Failed: {str(e)}")