86 lines
3.1 KiB
JSON
86 lines
3.1 KiB
JSON
[
|
||
{
|
||
"id": 1,
|
||
"type": "core_function",
|
||
"query": "What is the difference between /scrape and /map endpoints?",
|
||
"ground_truth": "/map is used to crawl a website and retrieve all URLs, while /scrape is used to extract content from a specific URL.",
|
||
"keywords": ["URL", "content", "specific", "retrieve"]
|
||
},
|
||
{
|
||
"id": 2,
|
||
"type": "new_feature",
|
||
"query": "What is the Deep Research feature?",
|
||
"ground_truth": "Deep Research is an alpha feature allowing agents to perform iterative research tasks.",
|
||
"keywords": ["alpha", "iterative", "research", "agent"]
|
||
},
|
||
{
|
||
"id": 3,
|
||
"type": "integration",
|
||
"query": "How can I integrate Firecrawl with ChatGPT?",
|
||
"ground_truth": "Firecrawl can be integrated via MCP (Model Context Protocol).",
|
||
"keywords": ["MCP", "Model Context Protocol", "setup"]
|
||
},
|
||
{
|
||
"id": 4,
|
||
"type": "multilingual_zh",
|
||
"query": "如何进行私有化部署 (Self-host)?",
|
||
"ground_truth": "你需要使用 Docker Compose 进行部署,文档位于 /self-host/quick-start/docker-compose。",
|
||
"keywords": ["Docker", "Compose", "self-host", "deploy"]
|
||
},
|
||
{
|
||
"id": 5,
|
||
"type": "api_detail",
|
||
"query": "What parameters are available for the /extract endpoint?",
|
||
"ground_truth": "The extract endpoint allows defining a schema for structured data extraction.",
|
||
"keywords": ["schema", "structured", "prompt"]
|
||
},
|
||
{
|
||
"id": 6,
|
||
"type": "numeric",
|
||
"query": "How do credits work for the scrape endpoint?",
|
||
"ground_truth": "Specific credit usage details are in the /credits endpoint documentation (usually 1 credit per page for basic scrape).",
|
||
"keywords": ["credit", "usage", "cost"]
|
||
},
|
||
{
|
||
"id": 7,
|
||
"type": "negative_test",
|
||
"query": "Does Firecrawl support scraping video content from YouTube?",
|
||
"ground_truth": "The documentation does not mention video scraping support.",
|
||
"keywords": []
|
||
},
|
||
{
|
||
"id": 8,
|
||
"type": "advanced",
|
||
"query": "How to use batch scrape?",
|
||
"ground_truth": "Use the /batch/scrape endpoint to submit multiple URLs at once.",
|
||
"keywords": ["batch", "multiple", "URLs"]
|
||
},
|
||
{
|
||
"id": 9,
|
||
"type": "automation",
|
||
"query": "Is there an n8n integration guide?",
|
||
"ground_truth": "Yes, there is a workflow automation guide for n8n.",
|
||
"keywords": ["n8n", "workflow", "automation"]
|
||
},
|
||
{
|
||
"id": 10,
|
||
"type": "security",
|
||
"query": "Where can I find information about webhook security?",
|
||
"ground_truth": "Information is available in the Webhooks Security section.",
|
||
"keywords": ["webhook", "security", "signature"]
|
||
},
|
||
{
|
||
"id": 11,
|
||
"type": "cross_lingual_trap",
|
||
"query": "Explain the crawl features in French.",
|
||
"ground_truth": "The system should ideally retrieve the French document (/fr/features/crawl) and answer in French.",
|
||
"keywords": ["fonctionnalités", "crawl", "fr"]
|
||
},
|
||
{
|
||
"id": 12,
|
||
"type": "api_history",
|
||
"query": "How to check historical token usage?",
|
||
"ground_truth": "Use the /token-usage-historical endpoint.",
|
||
"keywords": ["token", "usage", "historical"]
|
||
}
|
||
] |