# backend/main.py from fastapi import FastAPI, APIRouter, BackgroundTasks # 确保导入路径与你的文件名一致,如果文件名是 workflow.py 则用 workflow from .services.crawler_sql_service import crawler_sql_service from .services.automated_crawler import workflow from .schemas import ( RegisterRequest, PendingRequest, SaveResultsRequest, AddUrlsRequest, SearchRequest, AutoMapRequest, AutoProcessRequest, TextSearchRequest ) from .utils import make_response app = FastAPI(title="Wiki Crawler API") # ========================================== # 工具函数 # ========================================== # ========================================== # V1 Router: 原始的底层接口 (Manual Control) # ========================================== router_v1 = APIRouter() @router_v1.post("/register") async def register(req: RegisterRequest): try: # Service 返回: {'task_id': 1, 'is_new_task': True, 'msg': '...'} res = crawler_sql_service.register_task(req.url) # 使用 pop 将 msg 提取出来作为响应的 msg,剩下的作为 data return make_response(1, res.pop("msg", "Success"), res) except Exception as e: return make_response(0, str(e)) @router_v1.post("/add_urls") async def add_urls(req: AddUrlsRequest): try: urls = req.urls_obj["urls"] res = crawler_sql_service.add_urls(req.task_id, urls=urls) return make_response(1, res.pop("msg", "Success"), res) except Exception as e: return make_response(0, str(e)) @router_v1.post("/pending_urls") async def pending_urls(req: PendingRequest): try: res = crawler_sql_service.get_pending_urls(req.task_id, req.limit) # 即使队列为空,Service 也会返回 msg="Queue is empty" return make_response(1, res.pop("msg", "Success"), res) except Exception as e: return make_response(0, str(e)) @router_v1.post("/save_results") async def save_results(req: SaveResultsRequest): try: res = crawler_sql_service.save_results(req.task_id, req.results) return make_response(1, res.pop("msg", "Success"), res) except Exception as e: return make_response(0, str(e)) @router_v1.post("/search") async def search_v1(req: SearchRequest): """V1 搜索:客户端手动传向量""" try: vector = req.query_embedding['vector'] if not vector: return make_response(2, "Vector is empty", None) # Service 现在返回 {'results': [...], 'msg': 'Found ...'} res = crawler_sql_service.search_knowledge( query_embedding=vector, task_id=req.task_id, limit=req.limit ) return make_response(1, res.pop("msg", "Search Done"), res) except Exception as e: return make_response(0, str(e)) # ========================================== # V2 Router: 自动化工作流 (Automated Workflow) # ========================================== router_v2 = APIRouter() @router_v2.post("/auto/map") async def auto_map(req: AutoMapRequest): """ [同步] 输入首页 URL,自动调用 Firecrawl Map 并入库 """ try: # Workflow 返回: {'task_id':..., 'msg': 'Task mapped...', ...} res = workflow.map_and_ingest(req.url) return make_response(1, res.pop("msg", "Mapping Started"), res) except Exception as e: return make_response(0, str(e)) @router_v2.post("/auto/process") async def auto_process(req: AutoProcessRequest, background_tasks: BackgroundTasks): """ [异步] 触发后台任务:消费队列 -> 抓取 -> Embedding -> 入库 """ try: # 将耗时操作放入后台任务 background_tasks.add_task(workflow.process_task_queue, req.task_id, req.batch_size) # 因为是后台任务,无法立即获取 Service 的返回值 msg,只能返回通用消息 return make_response(1, "Background processing started", {"task_id": req.task_id}) except Exception as e: return make_response(0, str(e)) @router_v2.post("/search") async def search_v2(req: TextSearchRequest): """ [智能] 输入自然语言文本 -> 后端转向量 -> 搜索 """ try: # Workflow 返回 {'results': [...], 'msg': '...'} res = workflow.search_with_embedding(req.query, req.task_id, req.limit) return make_response(1, res.pop("msg", "Search Success"), res) except Exception as e: return make_response(0, f"Search Failed: {str(e)}") # ========================================== # 挂载路由 # ========================================== app.include_router(router_v1, prefix="/api/v1", tags=["V1 Manual API"]) app.include_router(router_v2, prefix="/api/v2", tags=["V2 Automated Workflow"]) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)