Inhoud Ophalen - Olostep Docs

Haal pagina-inhoud op

curl --request GET \
  --url https://api.olostep.com/v1/retrieve \
  --header 'Authorization: Bearer <token>'

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.olostep.com/v1/retrieve', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.olostep.com/v1/retrieve",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.olostep.com/v1/retrieve"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

require 'uri'
require 'net/http'

url = URI("https://api.olostep.com/v1/retrieve")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "html_content": "<string>",
  "markdown_content": "<string>",
  "json_content": "<string>",
  "html_hosted_url": "<string>",
  "markdown_hosted_url": "<string>",
  "json_hosted_url": "<string>",
  "size_exceeded": true
}

GET

/

v1

/

retrieve

Haal pagina-inhoud op

curl --request GET \
  --url https://api.olostep.com/v1/retrieve \
  --header 'Authorization: Bearer <token>'

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.olostep.com/v1/retrieve', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.olostep.com/v1/retrieve",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.olostep.com/v1/retrieve"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

require 'uri'
require 'net/http'

url = URI("https://api.olostep.com/v1/retrieve")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "html_content": "<string>",
  "markdown_content": "<string>",
  "json_content": "<string>",
  "html_hosted_url": "<string>",
  "markdown_hosted_url": "<string>",
  "json_hosted_url": "<string>",
  "size_exceeded": true
}

Autorisaties

Authorization

string

header

vereist

Bearer authenticatie header in de vorm Bearer , waar jouw auth token is.

Queryparameters

retrieve_id

string

vereist

De ID van de pagina-inhoud om op te halen. Beschikbaar in de respons van /v1/crawls/{crawl_id}/pages, /v1/scrapes/{scrape_id} of /v1/batches/{batch_id}/items eindpunten

formats

enum<string>[]

Optionele array om alleen specifieke formaten in productie op te halen. Als deze niet wordt opgegeven, worden alle formaten geretourneerd.

Beschikbare opties:

html,

markdown,

json

Respons

Succesvolle respons met pagina-inhoud.

html_content

string

HTML-inhoud van de pagina, indien aangevraagd en beschikbaar.

markdown_content

string

Markdown-inhoud van de pagina, indien aangevraagd en beschikbaar.

json_content

string

JSON-inhoud van de pagina geretourneerd door parsers, indien aangevraagd en beschikbaar.

html_hosted_url

string

S3 bucket URL van html. Verloopt over 7 dagen.

markdown_hosted_url

string

S3 bucket URL van markdown. Verloopt over 7 dagen.

json_hosted_url

string

S3 bucket URL van json. Verloopt over 7 dagen.

size_exceeded

boolean

Als de grootte van inhoudsobjecten de limiet van 6MB overschrijdt. Als dit waar is, gebruik dan gehoste S3 urls om inhoud te krijgen.

Verwijder Schema Krijg kredietinformatie