{
  "openapi": "3.1.0",
  "info": {
    "title": "CrawlAPI",
    "description": "Web scraping and search API for AI agents and LLM pipelines. Converts any URL into clean markdown or structured data — JavaScript rendered, boilerplate removed, ready for LLM context windows.",
    "version": "1.0.0",
    "contact": {
      "name": "CrawlAPI",
      "url": "https://crawlapi.net"
    },
    "license": {
      "name": "Commercial",
      "url": "https://crawlapi.net"
    }
  },
  "servers": [
    {
      "url": "https://crawlapi.net",
      "description": "Production"
    }
  ],
  "security": [
    {
      "ApiKeyAuth": []
    }
  ],
  "paths": {
    "/v1/scrape": {
      "post": {
        "operationId": "scrapeUrl",
        "summary": "Scrape a URL",
        "description": "Scrapes a single URL with full JavaScript rendering and returns clean markdown, HTML, plain text, or structured metadata. Ideal for feeding web content into LLM prompts.",
        "tags": ["Scraping"],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/ScrapeRequest"
              },
              "examples": {
                "basic": {
                  "summary": "Basic markdown scrape",
                  "value": {
                    "url": "https://example.com",
                    "formats": ["markdown"]
                  }
                },
                "with_wait": {
                  "summary": "Wait for dynamic content",
                  "value": {
                    "url": "https://example.com",
                    "formats": ["markdown", "structured"],
                    "waitFor": 2000
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful scrape",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScrapeResponse"
                }
              }
            }
          },
          "400": {
            "description": "Bad request — missing or invalid URL",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "401": {
            "description": "Unauthorized — invalid or missing API key"
          },
          "429": {
            "description": "Rate limit exceeded"
          },
          "500": {
            "description": "Scrape failed — page unreachable or render error"
          }
        }
      }
    },
    "/v1/batch": {
      "post": {
        "operationId": "batchScrape",
        "summary": "Scrape multiple URLs",
        "description": "Scrapes up to 10 URLs in parallel. Returns results in the same order as the input array. Failed URLs are included with an error field rather than failing the whole request.",
        "tags": ["Scraping"],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/BatchRequest"
              },
              "examples": {
                "basic": {
                  "summary": "Batch scrape 3 URLs",
                  "value": {
                    "urls": [
                      "https://example.com",
                      "https://example.org",
                      "https://example.net"
                    ],
                    "formats": ["markdown"]
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Batch results (individual URLs may have succeeded or failed)",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/BatchResponse"
                }
              }
            }
          },
          "400": {
            "description": "Bad request — missing urls array or too many URLs (max 10)"
          },
          "401": {
            "description": "Unauthorized"
          },
          "429": {
            "description": "Rate limit exceeded"
          }
        }
      }
    },
    "/v1/search": {
      "post": {
        "operationId": "searchAndScrape",
        "summary": "Search the web and scrape results",
        "description": "Runs a web search query and returns scraped content for the top N results. Combines search and scrape in a single API call — ideal for autonomous research agents.",
        "tags": ["Search"],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/SearchRequest"
              },
              "examples": {
                "basic": {
                  "summary": "Search and get top 5 results",
                  "value": {
                    "query": "best web scraping libraries for Python 2025",
                    "num": 5,
                    "formats": ["markdown"]
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Search results with scraped content",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/SearchResponse"
                }
              }
            }
          },
          "400": {
            "description": "Bad request — missing query"
          },
          "401": {
            "description": "Unauthorized"
          },
          "429": {
            "description": "Rate limit exceeded"
          }
        }
      }
    },
    "/health": {
      "get": {
        "operationId": "healthCheck",
        "summary": "Health check",
        "description": "Returns service status. No authentication required.",
        "tags": ["System"],
        "security": [],
        "responses": {
          "200": {
            "description": "Service is healthy",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": { "type": "string", "example": "ok" },
                    "service": { "type": "string", "example": "CrawlAPI" }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "ApiKeyAuth": {
        "type": "apiKey",
        "in": "header",
        "name": "X-RapidAPI-Key",
        "description": "Your RapidAPI key. Get one at https://rapidapi.com/crawlapi/api/crawlapi"
      }
    },
    "schemas": {
      "ScrapeFormat": {
        "type": "string",
        "enum": ["markdown", "html", "text", "structured"],
        "description": "Output format. Use `markdown` for LLM context, `structured` for data extraction."
      },
      "ScrapeRequest": {
        "type": "object",
        "required": ["url"],
        "properties": {
          "url": {
            "type": "string",
            "format": "uri",
            "description": "The URL to scrape.",
            "example": "https://example.com"
          },
          "formats": {
            "type": "array",
            "items": { "$ref": "#/components/schemas/ScrapeFormat" },
            "default": ["markdown"],
            "description": "One or more output formats to return."
          },
          "waitFor": {
            "type": "integer",
            "description": "Milliseconds to wait after page load before capturing. Useful for JavaScript-heavy pages.",
            "minimum": 0,
            "maximum": 10000,
            "example": 1000
          },
          "timeout": {
            "type": "integer",
            "description": "Maximum time in milliseconds to wait for the page. Capped at 60000.",
            "minimum": 1000,
            "maximum": 60000,
            "default": 30000
          }
        }
      },
      "PageMetadata": {
        "type": "object",
        "properties": {
          "title": { "type": "string" },
          "description": { "type": "string" },
          "url": { "type": "string" },
          "statusCode": { "type": "integer" }
        }
      },
      "ScrapeData": {
        "type": "object",
        "properties": {
          "markdown": { "type": "string", "description": "Clean markdown content" },
          "html": { "type": "string", "description": "Rendered HTML" },
          "text": { "type": "string", "description": "Plain text content" },
          "structured": {
            "type": "object",
            "description": "Structured data (title, headings, links, images, metadata)"
          },
          "metadata": { "$ref": "#/components/schemas/PageMetadata" }
        }
      },
      "ScrapeResponse": {
        "type": "object",
        "properties": {
          "success": { "type": "boolean" },
          "data": { "$ref": "#/components/schemas/ScrapeData" }
        }
      },
      "BatchRequest": {
        "type": "object",
        "required": ["urls"],
        "properties": {
          "urls": {
            "type": "array",
            "items": { "type": "string", "format": "uri" },
            "minItems": 1,
            "maxItems": 10,
            "description": "Array of URLs to scrape (max 10)."
          },
          "formats": {
            "type": "array",
            "items": { "$ref": "#/components/schemas/ScrapeFormat" },
            "default": ["markdown"]
          },
          "timeout": {
            "type": "integer",
            "minimum": 1000,
            "maximum": 60000,
            "default": 30000
          }
        }
      },
      "BatchResult": {
        "type": "object",
        "properties": {
          "url": { "type": "string" },
          "success": { "type": "boolean" },
          "data": { "$ref": "#/components/schemas/ScrapeData" },
          "error": { "type": "string" }
        }
      },
      "BatchResponse": {
        "type": "object",
        "properties": {
          "success": { "type": "boolean" },
          "data": {
            "type": "array",
            "items": { "$ref": "#/components/schemas/BatchResult" }
          }
        }
      },
      "SearchRequest": {
        "type": "object",
        "required": ["query"],
        "properties": {
          "query": {
            "type": "string",
            "description": "Web search query",
            "example": "LangChain web scraping tools 2025"
          },
          "num": {
            "type": "integer",
            "minimum": 1,
            "maximum": 10,
            "default": 5,
            "description": "Number of results to return and scrape."
          },
          "formats": {
            "type": "array",
            "items": { "$ref": "#/components/schemas/ScrapeFormat" },
            "default": ["markdown"]
          },
          "timeout": {
            "type": "integer",
            "minimum": 1000,
            "maximum": 60000,
            "default": 30000
          }
        }
      },
      "SearchResult": {
        "type": "object",
        "properties": {
          "url": { "type": "string" },
          "title": { "type": "string" },
          "snippet": { "type": "string" },
          "success": { "type": "boolean" },
          "data": { "$ref": "#/components/schemas/ScrapeData" },
          "error": { "type": "string" }
        }
      },
      "SearchResponse": {
        "type": "object",
        "properties": {
          "success": { "type": "boolean" },
          "data": {
            "type": "array",
            "items": { "$ref": "#/components/schemas/SearchResult" }
          }
        }
      },
      "ErrorResponse": {
        "type": "object",
        "properties": {
          "success": { "type": "boolean", "example": false },
          "error": { "type": "string" }
        }
      }
    }
  },
  "tags": [
    {
      "name": "Scraping",
      "description": "Single and batch URL scraping with JS rendering"
    },
    {
      "name": "Search",
      "description": "Web search with automatic scraping of results"
    },
    {
      "name": "System",
      "description": "Health and status endpoints"
    }
  ],
  "externalDocs": {
    "description": "CrawlAPI on RapidAPI",
    "url": "https://rapidapi.com/crawlapi/api/crawlapi"
  }
}
