42 lines
1.6 KiB
Python
42 lines
1.6 KiB
Python
from sqlalchemy import create_engine, MetaData, Table
|
|
from pgvector.sqlalchemy import Vector
|
|
from .config import settings
|
|
import logging
|
|
|
|
# 获取当前模块的专用 Logger
|
|
# __name__ 会自动识别为 "backend.services.crawler_service" 这样的路径
|
|
logger = logging.getLogger(__name__)
|
|
class Database:
|
|
"""
|
|
数据库单例类
|
|
负责初始化连接池并反射加载现有的表结构
|
|
"""
|
|
def __init__(self):
|
|
# 1. 创建引擎
|
|
# pool_pre_ping=True 用于解决数据库连接长时间空闲后断开的问题
|
|
self.engine = create_engine(settings.DATABASE_URL, pool_pre_ping=True)
|
|
|
|
# 2. 注册 pgvector 类型
|
|
# 这是为了让 SQLAlchemy 反射机制能识别数据库中的 'vector' 类型
|
|
self.engine.dialect.ischema_names['vector'] = Vector
|
|
|
|
self.metadata = MetaData()
|
|
self.tasks = None
|
|
self.queue = None
|
|
self.chunks = None
|
|
|
|
self._reflect_tables()
|
|
|
|
def _reflect_tables(self):
|
|
"""自动从数据库加载表定义"""
|
|
try:
|
|
# autoload_with 会查询数据库元数据,自动填充 Column 信息
|
|
self.tasks = Table('crawl_tasks', self.metadata, autoload_with=self.engine)
|
|
self.queue = Table('crawl_queue', self.metadata, autoload_with=self.engine)
|
|
self.chunks = Table('knowledge_chunks', self.metadata, autoload_with=self.engine)
|
|
logger.info("Database tables reflected successfully.")
|
|
except Exception as e:
|
|
logger.error(f"Failed to reflect tables: {e}")
|
|
|
|
# 全局数据库实例
|
|
db = Database() |