2025-12-20 17:14:34 +08:00
|
|
|
|
from sqlalchemy import create_engine, MetaData, Table, event
|
|
|
|
|
|
from pgvector.sqlalchemy import Vector # 必须导入这个
|
2025-12-20 17:08:54 +08:00
|
|
|
|
from .config import settings
|
|
|
|
|
|
|
|
|
|
|
|
class Database:
|
|
|
|
|
|
def __init__(self):
|
2025-12-20 17:14:34 +08:00
|
|
|
|
# 1. 创建引擎
|
2025-12-20 17:08:54 +08:00
|
|
|
|
self.engine = create_engine(settings.DATABASE_URL, pool_pre_ping=True)
|
2025-12-20 17:14:34 +08:00
|
|
|
|
|
|
|
|
|
|
# 2. 【核心修复】手动注册 vector 类型,让反射能识别它
|
|
|
|
|
|
# 这告诉 SQLAlchemy:如果在数据库里看到名为 "vector" 的类型,请使用 pgvector 库的 Vector 类来处理
|
|
|
|
|
|
self.engine.dialect.ischema_names['vector'] = Vector
|
|
|
|
|
|
|
2025-12-20 17:08:54 +08:00
|
|
|
|
self.metadata = MetaData()
|
|
|
|
|
|
self.tasks = None
|
|
|
|
|
|
self.queue = None
|
|
|
|
|
|
self.chunks = None
|
2025-12-20 17:14:34 +08:00
|
|
|
|
|
2025-12-20 17:08:54 +08:00
|
|
|
|
self._reflect_tables()
|
|
|
|
|
|
|
|
|
|
|
|
def _reflect_tables(self):
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 自动从数据库加载表结构
|
2025-12-20 17:14:34 +08:00
|
|
|
|
# 因为上面注册了 ischema_names,现在 chunks_table.c.embedding 就能被正确识别为 Vector 类型了
|
2025-12-20 17:08:54 +08:00
|
|
|
|
self.tasks = Table('crawl_tasks', self.metadata, autoload_with=self.engine)
|
|
|
|
|
|
self.queue = Table('crawl_queue', self.metadata, autoload_with=self.engine)
|
|
|
|
|
|
self.chunks = Table('knowledge_chunks', self.metadata, autoload_with=self.engine)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"❌ 数据库表加载失败: {e}")
|
|
|
|
|
|
|
2025-12-20 17:14:34 +08:00
|
|
|
|
# 全局单例
|
2025-12-20 17:08:54 +08:00
|
|
|
|
db_instance = Database()
|