Files

148 lines
5.3 KiB
Python
Raw Permalink Normal View History

2025-12-20 17:08:54 +08:00
import sys
import time
import uuid
def main():
report = {
"status": "running",
"steps": [],
"env_info": {},
"vector_pipeline_test": "pending"
}
try:
# ==========================================
# 1. 环境自检
# ==========================================
import numpy
import requests
import langchain
import langchain_community
import pydantic
report["env_info"] = {
"python": sys.version.split()[0],
"numpy": numpy.__version__,
"langchain": langchain.__version__,
"pydantic": pydantic.__version__
}
report["steps"].append("✅ 基础环境库加载成功")
# ==========================================
# 2. 定义 Mock 组件 (模拟向量生成)
# ==========================================
from langchain_core.embeddings import Embeddings
from langchain_core.documents import Document
class MockEmbeddings(Embeddings):
"""生成固定维度的随机向量,模拟 OpenAI/HuggingFace"""
def __init__(self, dim=1536):
self.dim = dim
def embed_documents(self, texts):
# 返回 N 个随机向量
return [numpy.random.rand(self.dim).tolist() for _ in texts]
def embed_query(self, text):
# 返回 1 个随机向量
return numpy.random.rand(self.dim).tolist()
embeddings = MockEmbeddings(dim=768)
report["steps"].append("✅ 向量嵌入模型 (Mock) 初始化成功")
# ==========================================
# 3. 实战测试:使用 Qdrant 内存模式跑全流程
# ==========================================
# 这一步能验证 LangChain 和底层库的数据流转是否正常
try:
from langchain_community.vectorstores import Qdrant
# 准备假数据
docs = [
Document(page_content="Hello Dify", metadata={"id": 1}),
Document(page_content="Vector Database Test", metadata={"id": 2}),
Document(page_content="Conflict Check", metadata={"id": 3}),
]
# --- 关键步骤:在内存中建库 ---
# Qdrant 支持 location=":memory:",不需要外部服务器
db = Qdrant.from_documents(
docs,
embeddings,
location=":memory:",
collection_name="test_collection"
)
# --- 关键步骤:执行向量检索 ---
results = db.similarity_search("Hello", k=1)
if results and len(results) > 0:
report["vector_pipeline_test"] = "✅ Success (Qdrant In-Memory)"
report["steps"].append(f"✅ 向量存取测试通过: 检索到 '{results[0].page_content}'")
else:
report["vector_pipeline_test"] = "❌ Failed (No results)"
except Exception as e:
report["vector_pipeline_test"] = f"❌ Failed: {str(e)}"
# 如果这一步挂了,说明依赖有深层冲突
# ==========================================
# 4. 其他数据库驱动加载测试
# ==========================================
# 这一步测试是否能正确 import确保驱动都在
drivers = {}
# [PostgreSQL / PGVector]
try:
import psycopg2
from langchain_community.vectorstores import PGVector
drivers["postgres"] = "✅ Loaded"
except Exception as e:
drivers["postgres"] = f"❌ Error: {str(e)}"
# [Milvus]
try:
import pymilvus
from langchain_community.vectorstores import Milvus
drivers["milvus"] = "✅ Loaded"
except Exception as e:
drivers["milvus"] = f"❌ Error: {str(e)}"
# [Redis]
try:
import redis
from langchain_community.vectorstores import Redis
drivers["redis"] = "✅ Loaded"
except Exception as e:
drivers["redis"] = f"❌ Error: {str(e)}"
# [Pinecone]
try:
import pinecone # 新版包名
from langchain_community.vectorstores import Pinecone
drivers["pinecone"] = "✅ Loaded"
except Exception as e:
drivers["pinecone"] = f"❌ Error: {str(e)}"
report["drivers_status"] = drivers
# ==========================================
# 5. 最终判定
# ==========================================
if "" not in str(drivers.values()) and "Success" in report["vector_pipeline_test"]:
report["status"] = "success"
report["message"] = "🎉 完美验证!所有库已就绪,且向量计算逻辑运行正常。"
else:
report["status"] = "warning"
report["message"] = "存在潜在问题,请检查详细信息。"
except Exception as e:
report["status"] = "error"
report["message"] = f"全局错误: {str(e)}"
return report
if __name__ == "__main__":
report = main()
print(report)