Files
wiki_crawler/backend/main.py

120 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# backend/main.py
from fastapi import FastAPI, APIRouter, BackgroundTasks
from .service import crawler_sql_service
from .workflow import workflow
from .schemas import (
RegisterRequest, PendingRequest, SaveResultsRequest, AddUrlsRequest, SearchRequest,
AutoMapRequest, AutoProcessRequest, TextSearchRequest
)
from .utils import make_response
app = FastAPI(title="Wiki Crawler API")
# ==========================================
# V1 Router: 原始的底层接口 (Manual Control)
# ==========================================
router_v1 = APIRouter()
@router_v1.post("/register")
async def register(req: RegisterRequest):
try:
data = crawler_sql_service.register_task(req.url)
return make_response(1, "Success", data)
except Exception as e:
return make_response(0, str(e))
@router_v1.post("/add_urls")
async def add_urls(req: AddUrlsRequest):
try:
urls = req.urls_obj["urls"]
data = crawler_sql_service.add_urls(req.task_id, urls=urls)
return make_response(1, "Success", data)
except Exception as e:
return make_response(0, str(e))
@router_v1.post("/pending_urls")
async def pending_urls(req: PendingRequest):
try:
data = crawler_sql_service.get_pending_urls(req.task_id, req.limit)
msg = "Success" if data["urls"] else "Queue Empty"
return make_response(1, msg, data)
except Exception as e:
return make_response(0, str(e))
@router_v1.post("/save_results")
async def save_results(req: SaveResultsRequest):
try:
data = crawler_sql_service.save_results(req.task_id, req.results)
return make_response(1, "Success", data)
except Exception as e:
return make_response(0, str(e))
@router_v1.post("/search")
async def search_v1(req: SearchRequest):
"""V1 搜索:需要客户端自己传向量"""
try:
vector = req.query_embedding['vector']
# 注意这里需要确认你数据库的向量维度。TextEmbedding V3 可能是 1024V2 是 1536。
# 请根据你的 PGVector 设置进行匹配。
if not vector:
return make_response(2, "Vector is empty", None)
data = crawler_sql_service.search_knowledge(
query_embedding=vector,
task_id=req.task_id,
limit=req.limit
)
return make_response(1, "Search Done", data)
except Exception as e:
return make_response(0, str(e))
# ==========================================
# V2 Router: 自动化工作流 (Automated Workflow)
# ==========================================
router_v2 = APIRouter()
@router_v2.post("/auto/map")
async def auto_map(req: AutoMapRequest, background_tasks: BackgroundTasks):
"""
[异步] 输入首页 URL自动调用 Firecrawl Map 并入库
"""
# 也可以放入 background_tasks但 map 通常比较快这里演示同步返回任务ID
try:
# 为了不阻塞主线程,如果 map 很慢,建议放入 background_tasks
# 这里为了能立刻看到 task_id先同步调用 (Firecrawl Map 比较快)
data = workflow.map_and_ingest(req.url)
return make_response(1, "Mapping Started", data)
except Exception as e:
return make_response(0, str(e))
@router_v2.post("/auto/process")
async def auto_process(req: AutoProcessRequest, background_tasks: BackgroundTasks):
"""
[异步] 触发后台任务:消费队列 -> 抓取 -> Embedding -> 入库
"""
# 将耗时操作放入后台任务
background_tasks.add_task(workflow.process_task_queue, req.task_id, req.batch_size)
return make_response(1, "Processing started in background", {"task_id": req.task_id})
@router_v2.post("/search")
async def search_v2(req: TextSearchRequest):
"""
[智能] 输入自然语言文本 -> 后端转向量 -> 搜索
"""
try:
data = workflow.search_with_embedding(req.query, req.task_id, req.limit)
return make_response(1, "Search Success", data)
except Exception as e:
return make_response(0, f"Search Failed: {str(e)}")
# ==========================================
# 挂载路由
# ==========================================
app.include_router(router_v1, prefix="/api/v1", tags=["V1 Manual API"])
app.include_router(router_v2, prefix="/api/v2", tags=["V2 Automated Workflow"])
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)