Files
wiki_crawler/backend/database.py

32 lines
1.3 KiB
Python
Raw Normal View History

2025-12-20 17:14:34 +08:00
from sqlalchemy import create_engine, MetaData, Table, event
from pgvector.sqlalchemy import Vector # 必须导入这个
2025-12-20 17:08:54 +08:00
from .config import settings
class Database:
def __init__(self):
2025-12-20 17:14:34 +08:00
# 1. 创建引擎
2025-12-20 17:08:54 +08:00
self.engine = create_engine(settings.DATABASE_URL, pool_pre_ping=True)
2025-12-20 17:14:34 +08:00
# 2. 【核心修复】手动注册 vector 类型,让反射能识别它
# 这告诉 SQLAlchemy如果在数据库里看到名为 "vector" 的类型,请使用 pgvector 库的 Vector 类来处理
self.engine.dialect.ischema_names['vector'] = Vector
2025-12-20 17:08:54 +08:00
self.metadata = MetaData()
self.tasks = None
self.queue = None
self.chunks = None
2025-12-20 17:14:34 +08:00
2025-12-20 17:08:54 +08:00
self._reflect_tables()
def _reflect_tables(self):
try:
# 自动从数据库加载表结构
2025-12-20 17:14:34 +08:00
# 因为上面注册了 ischema_names现在 chunks_table.c.embedding 就能被正确识别为 Vector 类型了
2025-12-20 17:08:54 +08:00
self.tasks = Table('crawl_tasks', self.metadata, autoload_with=self.engine)
self.queue = Table('crawl_queue', self.metadata, autoload_with=self.engine)
self.chunks = Table('knowledge_chunks', self.metadata, autoload_with=self.engine)
except Exception as e:
print(f"❌ 数据库表加载失败: {e}")
2025-12-20 17:14:34 +08:00
# 全局单例
2025-12-20 17:08:54 +08:00
db_instance = Database()