Files
wiki_crawler/backend/core/database.py

38 lines
1.4 KiB
Python
Raw Normal View History

2026-01-13 01:37:26 +08:00
from sqlalchemy import create_engine, MetaData, Table
from pgvector.sqlalchemy import Vector
2025-12-20 17:08:54 +08:00
from .config import settings
class Database:
2026-01-13 01:37:26 +08:00
"""
数据库单例类
负责初始化连接池并反射加载现有的表结构
"""
2025-12-20 17:08:54 +08:00
def __init__(self):
2025-12-20 17:14:34 +08:00
# 1. 创建引擎
2026-01-13 01:37:26 +08:00
# pool_pre_ping=True 用于解决数据库连接长时间空闲后断开的问题
2025-12-20 17:08:54 +08:00
self.engine = create_engine(settings.DATABASE_URL, pool_pre_ping=True)
2025-12-20 17:14:34 +08:00
2026-01-13 01:37:26 +08:00
# 2. 注册 pgvector 类型
# 这是为了让 SQLAlchemy 反射机制能识别数据库中的 'vector' 类型
2025-12-20 17:14:34 +08:00
self.engine.dialect.ischema_names['vector'] = Vector
2025-12-20 17:08:54 +08:00
self.metadata = MetaData()
self.tasks = None
self.queue = None
self.chunks = None
2025-12-20 17:14:34 +08:00
2025-12-20 17:08:54 +08:00
self._reflect_tables()
def _reflect_tables(self):
2026-01-13 01:37:26 +08:00
"""自动从数据库加载表定义"""
2025-12-20 17:08:54 +08:00
try:
2026-01-13 01:37:26 +08:00
# autoload_with 会查询数据库元数据,自动填充 Column 信息
2025-12-20 17:08:54 +08:00
self.tasks = Table('crawl_tasks', self.metadata, autoload_with=self.engine)
self.queue = Table('crawl_queue', self.metadata, autoload_with=self.engine)
self.chunks = Table('knowledge_chunks', self.metadata, autoload_with=self.engine)
2026-01-13 01:37:26 +08:00
print("[INFO] Database tables reflected successfully.")
2025-12-20 17:08:54 +08:00
except Exception as e:
2026-01-13 01:37:26 +08:00
print(f"[ERROR] Failed to reflect tables: {e}")
2025-12-20 17:08:54 +08:00
2026-01-13 01:37:26 +08:00
# 全局数据库实例
db = Database()