Files
wiki_crawler/tests/rag_benchmark/dataset.json
2026-01-27 01:41:45 +08:00

86 lines
3.1 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
[
{
"id": 1,
"type": "core_function",
"query": "What is the difference between /scrape and /map endpoints?",
"ground_truth": "/map is used to crawl a website and retrieve all URLs, while /scrape is used to extract content from a specific URL.",
"keywords": ["URL", "content", "specific", "retrieve"]
},
{
"id": 2,
"type": "new_feature",
"query": "What is the Deep Research feature?",
"ground_truth": "Deep Research is an alpha feature allowing agents to perform iterative research tasks.",
"keywords": ["alpha", "iterative", "research", "agent"]
},
{
"id": 3,
"type": "integration",
"query": "How can I integrate Firecrawl with ChatGPT?",
"ground_truth": "Firecrawl can be integrated via MCP (Model Context Protocol).",
"keywords": ["MCP", "Model Context Protocol", "setup"]
},
{
"id": 4,
"type": "multilingual_zh",
"query": "如何进行私有化部署 (Self-host)",
"ground_truth": "你需要使用 Docker Compose 进行部署,文档位于 /self-host/quick-start/docker-compose。",
"keywords": ["Docker", "Compose", "self-host", "deploy"]
},
{
"id": 5,
"type": "api_detail",
"query": "What parameters are available for the /extract endpoint?",
"ground_truth": "The extract endpoint allows defining a schema for structured data extraction.",
"keywords": ["schema", "structured", "prompt"]
},
{
"id": 6,
"type": "numeric",
"query": "How do credits work for the scrape endpoint?",
"ground_truth": "Specific credit usage details are in the /credits endpoint documentation (usually 1 credit per page for basic scrape).",
"keywords": ["credit", "usage", "cost"]
},
{
"id": 7,
"type": "negative_test",
"query": "Does Firecrawl support scraping video content from YouTube?",
"ground_truth": "The documentation does not mention video scraping support.",
"keywords": []
},
{
"id": 8,
"type": "advanced",
"query": "How to use batch scrape?",
"ground_truth": "Use the /batch/scrape endpoint to submit multiple URLs at once.",
"keywords": ["batch", "multiple", "URLs"]
},
{
"id": 9,
"type": "automation",
"query": "Is there an n8n integration guide?",
"ground_truth": "Yes, there is a workflow automation guide for n8n.",
"keywords": ["n8n", "workflow", "automation"]
},
{
"id": 10,
"type": "security",
"query": "Where can I find information about webhook security?",
"ground_truth": "Information is available in the Webhooks Security section.",
"keywords": ["webhook", "security", "signature"]
},
{
"id": 11,
"type": "cross_lingual_trap",
"query": "Explain the crawl features in French.",
"ground_truth": "The system should ideally retrieve the French document (/fr/features/crawl) and answer in French.",
"keywords": ["fonctionnalités", "crawl", "fr"]
},
{
"id": 12,
"type": "api_history",
"query": "How to check historical token usage?",
"ground_truth": "Use the /token-usage-historical endpoint.",
"keywords": ["token", "usage", "historical"]
}
]