> ## Documentation Index
> Fetch the complete documentation index at: https://docs.olostep.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Create Scrape

> [Scrape](https://docs.olostep.com/features/scrapes) a url with provided configuration and get content.



## OpenAPI

````yaml openapi/scrapes.json POST /v1/scrapes
openapi: 3.0.3
info:
  title: Scrapes API
  version: 1.0.0
servers:
  - url: https://api.olostep.com
security: []
paths:
  /v1/scrapes:
    post:
      summary: Initiate a web page scrape
      description: >-
        This endpoint allows users to start a web page scrape with various
        configurations.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                url_to_scrape:
                  type: string
                  format: uri
                  description: The URL to start scraping from.
                wait_before_scraping:
                  type: integer
                  description: Time to wait in milliseconds before starting the scraping.
                formats:
                  type: array
                  items:
                    type: string
                    enum:
                      - html
                      - markdown
                      - text
                      - json
                      - raw_pdf
                      - screenshot
                  description: Formats in which you want the content.
                remove_css_selectors:
                  type: string
                  enum:
                    - default
                    - none
                    - array
                  description: >-
                    Option to remove certain CSS selectors from the content.
                    Optionally, you can also pass a JSON stringified array of
                    specific selectors you want to remove. The CSS selectors
                    removed when this option is set to default are
                    ['nav','footer','script','style','noscript','svg',[role=alert],[role=banner],[role=dialog],[role=alertdialog],[role=region][aria-label*=skip
                    i],[aria-modal=true]]
                actions:
                  type: array
                  items:
                    type: object
                    discriminator:
                      propertyName: type
                    oneOf:
                      - type: object
                        title: Wait
                        required:
                          - type
                          - milliseconds
                        properties:
                          type:
                            type: string
                            enum:
                              - wait
                            description: Wait for a specified amount of milliseconds
                          milliseconds:
                            type: integer
                            minimum: 0
                            description: Time to wait in milliseconds
                      - type: object
                        title: Click
                        required:
                          - type
                          - selector
                        properties:
                          type:
                            type: string
                            enum:
                              - click
                            description: Click on an element
                          selector:
                            type: string
                            description: CSS selector for the element to click
                      - type: object
                        title: Fill Input
                        required:
                          - type
                          - selector
                          - value
                        properties:
                          type:
                            type: string
                            enum:
                              - fill_input
                            description: Fill an input element with a value
                          selector:
                            type: string
                            description: CSS selector for the input element
                          value:
                            type: string
                            description: Text to enter into the input
                      - type: object
                        title: Scroll
                        required:
                          - type
                          - direction
                          - amount
                        properties:
                          type:
                            type: string
                            enum:
                              - scroll
                            description: Scroll the page
                          direction:
                            type: string
                            enum:
                              - up
                              - down
                              - left
                              - right
                            description: Direction to scroll
                          amount:
                            type: number
                            description: Amount to scroll in pixels
                  description: Actions to perform on the page before getting the content.
                country:
                  type: string
                  description: >-
                    Residential country to load the request from.


                    Supported values are:

                    - US (United States)

                    - CA (Canada)

                    - IT (Italy)

                    - IN (India)

                    - GB (England)

                    - JP (Japan)

                    - MX (Mexico)

                    - AU (Australia)

                    - ID (Indonesia)

                    - UA (UAE)

                    - RU (Russia)

                    - RANDOM


                    Some operations, like scraping Google Search and Google
                    News, support all countries.
                transformer:
                  type: string
                  enum:
                    - postlight
                    - none
                  description: >-
                    Specify the HTML transformer to use, if any. Postlight's
                    Mercury Parser library is used to remove ads and other
                    unwanted content from the scraped content.
                remove_images:
                  type: boolean
                  description: >-
                    Option to remove images from the scraped content. Defaults
                    to false.
                  default: false
                remove_class_names:
                  type: array
                  items:
                    type: string
                  description: List of class names to remove from the content.
                parser:
                  type: object
                  properties:
                    id:
                      type: string
                      description: ID of the parser to use.
                  required:
                    - id
                  description: >-
                    When defining json as a format, you can use this parameter
                    to specify the parser to use. Parsers are useful to extract
                    structured content from web pages. Olostep has a few parsers
                    built in for most common web pages, and you can also create
                    your own parsers.
                llm_extract:
                  type: object
                  properties:
                    schema:
                      type: object
                      description: Schema for the LLM extraction.
                links_on_page:
                  type: object
                  properties:
                    absolute_links:
                      type: boolean
                      default: true
                      description: >-
                        When true, it returns complete URLs (e.g.,
                        'https://example.com/page') instead of relative paths
                        (e.g., '/page')
                    query_to_order_links_by:
                      type: string
                      description: >-
                        Orders the returned links by their similarity to the
                        provided query text, prioritizing the most relevant
                        matches first
                    include_links:
                      type: array
                      items:
                        type: string
                      description: >-
                        Filter extracted links using glob patterns with
                        `include_links`. Use patterns like "*.pdf" to match file
                        extensions, "/blog/*" for specific paths, or full URLs
                        like "https://example.com/*". Supports wildcards (*),
                        character classes ([a-z]), and alternation
                        ({pattern1,pattern2}).
                    exclude_links:
                      type: array
                      items:
                        type: string
                      description: >-
                        Filter extracted links using glob patterns with
                        `exclude_links`. Use patterns like "*.pdf" to match file
                        extensions, "/blog/*" for specific paths, or full URLs
                        like "https://example.com/*". Supports wildcards (*),
                        character classes ([a-z]), and alternation
                        ({pattern1,pattern2}).
                  description: >-
                    With this option, you can get all the links present on the
                    page you scrape.
                screen_size:
                  type: object
                  properties:
                    screen_type:
                      type: string
                      enum:
                        - default
                        - mobile
                        - desktop
                      description: >-
                        Type of screen. Desktop uses 1920x1080 pixels, mobile
                        uses 414x896 pixels, and default uses 1024x768 pixels.
                    screen_width:
                      type: integer
                      description: >-
                        Width of the screen in pixels. Desktop: 1920px, mobile:
                        414px, default: 768px.
                    screen_height:
                      type: integer
                      description: >-
                        Height of the screen in pixels. Desktop: 1080px, mobile:
                        896px, default: 1024px.
                  description: >-
                    Configuration for screen size. Preset dimensions are
                    available through screen_type: desktop (1920x1080), mobile
                    (414x896), or default (768x1024).
                screenshot:
                  type: object
                  properties:
                    full_page:
                      type: boolean
                      description: >-
                        if passed true, the full page screenshot is taken after
                        scrolling to the bottom of the site.
                metadata:
                  type: object
                  description: User-defined metadata. Not supported yet
              required:
                - url_to_scrape
      responses:
        '200':
          description: Successful response with the scrape initiation details.
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                    description: Scrape ID
                  object:
                    type: string
                    description: The kind of object. "scrape" for this endpoint.
                  created:
                    type: number
                    description: Created epoch
                  metadata:
                    type: object
                    description: User-defined metadata.
                  url_to_scrape:
                    type: string
                    description: The URL that was scraped.
                  result:
                    type: object
                    properties:
                      html_content:
                        type: string
                      markdown_content:
                        type: string
                      text_content:
                        type: string
                      json_content:
                        type: string
                        description: Content from parser
                      screenshot_hosted_url:
                        type: string
                      html_hosted_url:
                        type: string
                      markdown_hosted_url:
                        type: string
                      text_hosted_url:
                        type: string
                      links_on_page:
                        type: array
                        items:
                          type: string
                      page_metadata:
                        type: object
                        properties:
                          status_code:
                            type: integer
                          title:
                            type: string
                  credits_consumed:
                    type: integer
                    nullable: true
                    description: >-
                      Number of credits consumed by this request. Populated
                      after execution completes. Credits are the source of truth
                      for billing.
                  cost_usd:
                    type: number
                    nullable: true
                    description: >-
                      Estimated cost in USD for this request. Populated after
                      execution completes. Calculated from credits consumed and
                      your plan rate — 99% accurate, but credits_consumed is the
                      authoritative value.
        '400':
          description: >-
            The request cannot be fulfilled due to a problem with the target
            URL. Common codes: `dns_resolution_failed` (domain does not exist),
            `invalid_url` (malformed URL).
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  object:
                    type: string
                    enum:
                      - error
                  created:
                    type: integer
                  metadata:
                    type: object
                  error:
                    type: object
                    properties:
                      type:
                        type: string
                        enum:
                          - invalid_request_error
                      code:
                        type: string
                        example: dns_resolution_failed
                      message:
                        type: string
              example:
                id: error_x2nmu5bqn6
                object: error
                created: 1777923912
                metadata: {}
                error:
                  type: invalid_request_error
                  code: dns_resolution_failed
                  message: The URL contains a typo, or the domain does not exist.
        '402':
          description: Payment required — invalid or exhausted API key.
        '404':
          description: The requested scrape ID was not found.
        '500':
          description: Internal server error.
        '502':
          description: >-
            The target website has a TLS/SSL configuration problem. `error.code`
            is always `tls_error`; `error.detail` carries the specific low-level
            SSL error code (e.g. `err_ssl_tlsv1_alert_internal_error`,
            `cert_verification_failed`).
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  object:
                    type: string
                    enum:
                      - error
                  created:
                    type: integer
                  url:
                    type: string
                  metadata:
                    type: object
                  error:
                    type: object
                    properties:
                      type:
                        type: string
                        enum:
                          - invalid_request_error
                      code:
                        type: string
                        enum:
                          - tls_error
                      detail:
                        type: string
                        description: Low-level SSL error code for diagnostics.
                      message:
                        type: string
              example:
                id: error_ogeb6rik8c
                object: error
                created: 1777923969
                url: https://example.com
                metadata: {}
                error:
                  type: invalid_request_error
                  code: tls_error
                  detail: err_ssl_tlsv1_alert_internal_error
                  message: >-
                    The website closed or rejected the TLS handshake. The server
                    may be misconfigured or use an unsupported SSL/TLS version.
        '504':
          description: >-
            The scrape did not complete within the wait budget (~55 seconds).
            The target page may be slow, bot-protected, or temporarily
            unavailable. Safe to retry.
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  object:
                    type: string
                    enum:
                      - error
                  created:
                    type: integer
                  url:
                    type: string
                  metadata:
                    type: object
                  error:
                    type: object
                    properties:
                      type:
                        type: string
                        enum:
                          - request_timeout
                      code:
                        type: string
                        enum:
                          - scrape_poll_timeout
                      message:
                        type: string
              example:
                id: error_qat3d1amjt
                object: error
                created: 1777923969
                url: https://example.com
                metadata: {}
                error:
                  type: request_timeout
                  code: scrape_poll_timeout
                  message: >-
                    Request timed out while waiting for scrape result. The page
                    may be slow, blocked for our fetchers, or temporarily
                    unavailable.
      security:
        - Authorization: []
components:
  securitySchemes:
    Authorization:
      type: http
      scheme: bearer
      description: >-
        Bearer authentication header of the form Bearer <token>, where <token>
        is your auth token.

````