import sys import time import uuid def main(): report = { "status": "running", "steps": [], "env_info": {}, "vector_pipeline_test": "pending" } try: # ========================================== # 1. 环境自检 # ========================================== import numpy import requests import langchain import langchain_community import pydantic report["env_info"] = { "python": sys.version.split()[0], "numpy": numpy.__version__, "langchain": langchain.__version__, "pydantic": pydantic.__version__ } report["steps"].append("✅ 基础环境库加载成功") # ========================================== # 2. 定义 Mock 组件 (模拟向量生成) # ========================================== from langchain_core.embeddings import Embeddings from langchain_core.documents import Document class MockEmbeddings(Embeddings): """生成固定维度的随机向量,模拟 OpenAI/HuggingFace""" def __init__(self, dim=1536): self.dim = dim def embed_documents(self, texts): # 返回 N 个随机向量 return [numpy.random.rand(self.dim).tolist() for _ in texts] def embed_query(self, text): # 返回 1 个随机向量 return numpy.random.rand(self.dim).tolist() embeddings = MockEmbeddings(dim=768) report["steps"].append("✅ 向量嵌入模型 (Mock) 初始化成功") # ========================================== # 3. 实战测试:使用 Qdrant 内存模式跑全流程 # ========================================== # 这一步能验证 LangChain 和底层库的数据流转是否正常 try: from langchain_community.vectorstores import Qdrant # 准备假数据 docs = [ Document(page_content="Hello Dify", metadata={"id": 1}), Document(page_content="Vector Database Test", metadata={"id": 2}), Document(page_content="Conflict Check", metadata={"id": 3}), ] # --- 关键步骤:在内存中建库 --- # Qdrant 支持 location=":memory:",不需要外部服务器 db = Qdrant.from_documents( docs, embeddings, location=":memory:", collection_name="test_collection" ) # --- 关键步骤:执行向量检索 --- results = db.similarity_search("Hello", k=1) if results and len(results) > 0: report["vector_pipeline_test"] = "✅ Success (Qdrant In-Memory)" report["steps"].append(f"✅ 向量存取测试通过: 检索到 '{results[0].page_content}'") else: report["vector_pipeline_test"] = "❌ Failed (No results)" except Exception as e: report["vector_pipeline_test"] = f"❌ Failed: {str(e)}" # 如果这一步挂了,说明依赖有深层冲突 # ========================================== # 4. 其他数据库驱动加载测试 # ========================================== # 这一步测试是否能正确 import,确保驱动都在 drivers = {} # [PostgreSQL / PGVector] try: import psycopg2 from langchain_community.vectorstores import PGVector drivers["postgres"] = "✅ Loaded" except Exception as e: drivers["postgres"] = f"❌ Error: {str(e)}" # [Milvus] try: import pymilvus from langchain_community.vectorstores import Milvus drivers["milvus"] = "✅ Loaded" except Exception as e: drivers["milvus"] = f"❌ Error: {str(e)}" # [Redis] try: import redis from langchain_community.vectorstores import Redis drivers["redis"] = "✅ Loaded" except Exception as e: drivers["redis"] = f"❌ Error: {str(e)}" # [Pinecone] try: import pinecone # 新版包名 from langchain_community.vectorstores import Pinecone drivers["pinecone"] = "✅ Loaded" except Exception as e: drivers["pinecone"] = f"❌ Error: {str(e)}" report["drivers_status"] = drivers # ========================================== # 5. 最终判定 # ========================================== if "❌" not in str(drivers.values()) and "Success" in report["vector_pipeline_test"]: report["status"] = "success" report["message"] = "🎉 完美验证!所有库已就绪,且向量计算逻辑运行正常。" else: report["status"] = "warning" report["message"] = "存在潜在问题,请检查详细信息。" except Exception as e: report["status"] = "error" report["message"] = f"全局错误: {str(e)}" return report if __name__ == "__main__": report = main() print(report)