120 lines
4.3 KiB
Python
120 lines
4.3 KiB
Python
# backend/main.py
|
||
from fastapi import FastAPI, APIRouter, BackgroundTasks
|
||
from .service import crawler_sql_service
|
||
from .workflow import workflow
|
||
from .schemas import (
|
||
RegisterRequest, PendingRequest, SaveResultsRequest, AddUrlsRequest, SearchRequest,
|
||
AutoMapRequest, AutoProcessRequest, TextSearchRequest
|
||
)
|
||
from .utils import make_response
|
||
|
||
app = FastAPI(title="Wiki Crawler API")
|
||
|
||
# ==========================================
|
||
# V1 Router: 原始的底层接口 (Manual Control)
|
||
# ==========================================
|
||
router_v1 = APIRouter()
|
||
|
||
@router_v1.post("/register")
|
||
async def register(req: RegisterRequest):
|
||
try:
|
||
data = crawler_sql_service.register_task(req.url)
|
||
return make_response(1, "Success", data)
|
||
except Exception as e:
|
||
return make_response(0, str(e))
|
||
|
||
@router_v1.post("/add_urls")
|
||
async def add_urls(req: AddUrlsRequest):
|
||
try:
|
||
urls = req.urls_obj["urls"]
|
||
data = crawler_sql_service.add_urls(req.task_id, urls=urls)
|
||
return make_response(1, "Success", data)
|
||
except Exception as e:
|
||
return make_response(0, str(e))
|
||
|
||
@router_v1.post("/pending_urls")
|
||
async def pending_urls(req: PendingRequest):
|
||
try:
|
||
data = crawler_sql_service.get_pending_urls(req.task_id, req.limit)
|
||
msg = "Success" if data["urls"] else "Queue Empty"
|
||
return make_response(1, msg, data)
|
||
except Exception as e:
|
||
return make_response(0, str(e))
|
||
|
||
@router_v1.post("/save_results")
|
||
async def save_results(req: SaveResultsRequest):
|
||
try:
|
||
data = crawler_sql_service.save_results(req.task_id, req.results)
|
||
return make_response(1, "Success", data)
|
||
except Exception as e:
|
||
return make_response(0, str(e))
|
||
|
||
@router_v1.post("/search")
|
||
async def search_v1(req: SearchRequest):
|
||
"""V1 搜索:需要客户端自己传向量"""
|
||
try:
|
||
vector = req.query_embedding['vector']
|
||
# 注意:这里需要确认你数据库的向量维度。TextEmbedding V3 可能是 1024,V2 是 1536。
|
||
# 请根据你的 PGVector 设置进行匹配。
|
||
if not vector:
|
||
return make_response(2, "Vector is empty", None)
|
||
|
||
data = crawler_sql_service.search_knowledge(
|
||
query_embedding=vector,
|
||
task_id=req.task_id,
|
||
limit=req.limit
|
||
)
|
||
return make_response(1, "Search Done", data)
|
||
except Exception as e:
|
||
return make_response(0, str(e))
|
||
|
||
|
||
# ==========================================
|
||
# V2 Router: 自动化工作流 (Automated Workflow)
|
||
# ==========================================
|
||
router_v2 = APIRouter()
|
||
|
||
@router_v2.post("/auto/map")
|
||
async def auto_map(req: AutoMapRequest, background_tasks: BackgroundTasks):
|
||
"""
|
||
[异步] 输入首页 URL,自动调用 Firecrawl Map 并入库
|
||
"""
|
||
# 也可以放入 background_tasks,但 map 通常比较快,这里演示同步返回任务ID
|
||
try:
|
||
# 为了不阻塞主线程,如果 map 很慢,建议放入 background_tasks
|
||
# 这里为了能立刻看到 task_id,先同步调用 (Firecrawl Map 比较快)
|
||
data = workflow.map_and_ingest(req.url)
|
||
return make_response(1, "Mapping Started", data)
|
||
except Exception as e:
|
||
return make_response(0, str(e))
|
||
|
||
@router_v2.post("/auto/process")
|
||
async def auto_process(req: AutoProcessRequest, background_tasks: BackgroundTasks):
|
||
"""
|
||
[异步] 触发后台任务:消费队列 -> 抓取 -> Embedding -> 入库
|
||
"""
|
||
# 将耗时操作放入后台任务
|
||
background_tasks.add_task(workflow.process_task_queue, req.task_id, req.batch_size)
|
||
return make_response(1, "Processing started in background", {"task_id": req.task_id})
|
||
|
||
@router_v2.post("/search")
|
||
async def search_v2(req: TextSearchRequest):
|
||
"""
|
||
[智能] 输入自然语言文本 -> 后端转向量 -> 搜索
|
||
"""
|
||
try:
|
||
data = workflow.search_with_embedding(req.query, req.task_id, req.limit)
|
||
return make_response(1, "Search Success", data)
|
||
except Exception as e:
|
||
return make_response(0, f"Search Failed: {str(e)}")
|
||
|
||
|
||
# ==========================================
|
||
# 挂载路由
|
||
# ==========================================
|
||
app.include_router(router_v1, prefix="/api/v1", tags=["V1 Manual API"])
|
||
app.include_router(router_v2, prefix="/api/v2", tags=["V2 Automated Workflow"])
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
uvicorn.run(app, host="0.0.0.0", port=8000, reload=True) |