Files
wiki_crawler/scripts/update_sql.py

82 lines
2.9 KiB
Python
Raw Normal View History

2026-01-13 01:37:26 +08:00
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import create_engine, text
from backend.core.config import settings
def update_database_schema():
"""
数据库无损升级脚本
"""
print(f"🔌 连接数据库: {settings.DB_NAME}...")
engine = create_engine(settings.DATABASE_URL)
commands = [
# 1. 安全添加 meta_info 列 (旧数据会自动填充为 {})
"""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='knowledge_chunks' AND column_name='meta_info') THEN
ALTER TABLE knowledge_chunks ADD COLUMN meta_info JSONB DEFAULT '{}';
RAISE NOTICE '已添加 meta_info 列';
END IF;
END $$;
""",
# 2. 安全添加 content_tsvector 列
"""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='knowledge_chunks' AND column_name='content_tsvector') THEN
ALTER TABLE knowledge_chunks ADD COLUMN content_tsvector TSVECTOR;
RAISE NOTICE '已添加 content_tsvector 列';
END IF;
END $$;
""",
# 3. 创建索引 (不影响现有数据)
"CREATE INDEX IF NOT EXISTS idx_chunks_meta ON knowledge_chunks USING GIN (meta_info);",
"CREATE INDEX IF NOT EXISTS idx_chunks_tsvector ON knowledge_chunks USING GIN (content_tsvector);",
# 4. 创建触发器函数 (用于新插入的数据)
"""
CREATE OR REPLACE FUNCTION chunks_tsvector_trigger() RETURNS trigger AS $$
BEGIN
new.content_tsvector := to_tsvector('english', coalesce(new.title, '') || ' ' || new.content);
return new;
END
$$ LANGUAGE plpgsql;
""",
# 5. 绑定触发器
"""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'tsvectorupdate') THEN
CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE
ON knowledge_chunks FOR EACH ROW EXECUTE PROCEDURE chunks_tsvector_trigger();
END IF;
END $$;
""",
# 6. 【新增】回填旧数据
# 让以前存的 task_id=6 的数据也能生成关键词索引
"""
UPDATE knowledge_chunks
SET content_tsvector = to_tsvector('english', coalesce(title, '') || ' ' || content)
WHERE content_tsvector IS NULL;
"""
]
with engine.begin() as conn:
for cmd in commands:
try:
conn.execute(text(cmd))
except Exception as e:
print(f"⚠️ 执行警告 (通常可忽略): {e}")
print("✅ 数据库结构升级完成!旧数据已保留并兼容。")
if __name__ == "__main__":
update_database_schema()