Files
wiki_crawler/scripts/test_env.py
2025-12-20 17:08:54 +08:00

148 lines
5.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import sys
import time
import uuid
def main():
report = {
"status": "running",
"steps": [],
"env_info": {},
"vector_pipeline_test": "pending"
}
try:
# ==========================================
# 1. 环境自检
# ==========================================
import numpy
import requests
import langchain
import langchain_community
import pydantic
report["env_info"] = {
"python": sys.version.split()[0],
"numpy": numpy.__version__,
"langchain": langchain.__version__,
"pydantic": pydantic.__version__
}
report["steps"].append("✅ 基础环境库加载成功")
# ==========================================
# 2. 定义 Mock 组件 (模拟向量生成)
# ==========================================
from langchain_core.embeddings import Embeddings
from langchain_core.documents import Document
class MockEmbeddings(Embeddings):
"""生成固定维度的随机向量,模拟 OpenAI/HuggingFace"""
def __init__(self, dim=1536):
self.dim = dim
def embed_documents(self, texts):
# 返回 N 个随机向量
return [numpy.random.rand(self.dim).tolist() for _ in texts]
def embed_query(self, text):
# 返回 1 个随机向量
return numpy.random.rand(self.dim).tolist()
embeddings = MockEmbeddings(dim=768)
report["steps"].append("✅ 向量嵌入模型 (Mock) 初始化成功")
# ==========================================
# 3. 实战测试:使用 Qdrant 内存模式跑全流程
# ==========================================
# 这一步能验证 LangChain 和底层库的数据流转是否正常
try:
from langchain_community.vectorstores import Qdrant
# 准备假数据
docs = [
Document(page_content="Hello Dify", metadata={"id": 1}),
Document(page_content="Vector Database Test", metadata={"id": 2}),
Document(page_content="Conflict Check", metadata={"id": 3}),
]
# --- 关键步骤:在内存中建库 ---
# Qdrant 支持 location=":memory:",不需要外部服务器
db = Qdrant.from_documents(
docs,
embeddings,
location=":memory:",
collection_name="test_collection"
)
# --- 关键步骤:执行向量检索 ---
results = db.similarity_search("Hello", k=1)
if results and len(results) > 0:
report["vector_pipeline_test"] = "✅ Success (Qdrant In-Memory)"
report["steps"].append(f"✅ 向量存取测试通过: 检索到 '{results[0].page_content}'")
else:
report["vector_pipeline_test"] = "❌ Failed (No results)"
except Exception as e:
report["vector_pipeline_test"] = f"❌ Failed: {str(e)}"
# 如果这一步挂了,说明依赖有深层冲突
# ==========================================
# 4. 其他数据库驱动加载测试
# ==========================================
# 这一步测试是否能正确 import确保驱动都在
drivers = {}
# [PostgreSQL / PGVector]
try:
import psycopg2
from langchain_community.vectorstores import PGVector
drivers["postgres"] = "✅ Loaded"
except Exception as e:
drivers["postgres"] = f"❌ Error: {str(e)}"
# [Milvus]
try:
import pymilvus
from langchain_community.vectorstores import Milvus
drivers["milvus"] = "✅ Loaded"
except Exception as e:
drivers["milvus"] = f"❌ Error: {str(e)}"
# [Redis]
try:
import redis
from langchain_community.vectorstores import Redis
drivers["redis"] = "✅ Loaded"
except Exception as e:
drivers["redis"] = f"❌ Error: {str(e)}"
# [Pinecone]
try:
import pinecone # 新版包名
from langchain_community.vectorstores import Pinecone
drivers["pinecone"] = "✅ Loaded"
except Exception as e:
drivers["pinecone"] = f"❌ Error: {str(e)}"
report["drivers_status"] = drivers
# ==========================================
# 5. 最终判定
# ==========================================
if "" not in str(drivers.values()) and "Success" in report["vector_pipeline_test"]:
report["status"] = "success"
report["message"] = "🎉 完美验证!所有库已就绪,且向量计算逻辑运行正常。"
else:
report["status"] = "warning"
report["message"] = "存在潜在问题,请检查详细信息。"
except Exception as e:
report["status"] = "error"
report["message"] = f"全局错误: {str(e)}"
return report
if __name__ == "__main__":
report = main()
print(report)