コンテンツの取得 - Olostep Docs

ページコンテンツを取得

curl --request GET \
  --url https://api.olostep.com/v1/retrieve \
  --header 'Authorization: Bearer <token>'

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.olostep.com/v1/retrieve', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.olostep.com/v1/retrieve",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.olostep.com/v1/retrieve"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

require 'uri'
require 'net/http'

url = URI("https://api.olostep.com/v1/retrieve")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "html_content": "<string>",
  "markdown_content": "<string>",
  "json_content": "<string>",
  "html_hosted_url": "<string>",
  "markdown_hosted_url": "<string>",
  "json_hosted_url": "<string>",
  "size_exceeded": true
}

GET

/

v1

/

retrieve

ページコンテンツを取得

curl --request GET \
  --url https://api.olostep.com/v1/retrieve \
  --header 'Authorization: Bearer <token>'

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.olostep.com/v1/retrieve', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.olostep.com/v1/retrieve",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.olostep.com/v1/retrieve"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

require 'uri'
require 'net/http'

url = URI("https://api.olostep.com/v1/retrieve")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "html_content": "<string>",
  "markdown_content": "<string>",
  "json_content": "<string>",
  "html_hosted_url": "<string>",
  "markdown_hosted_url": "<string>",
  "json_hosted_url": "<string>",
  "size_exceeded": true
}

承認

Authorization

string

header

必須

Bearer 形式のBearer認証ヘッダー。はあなたの認証トークンです。

クエリパラメータ

retrieve_id

string

必須

取得するページコンテンツのID。/v1/crawls/{crawl_id}/pages、/v1/scrapes/{scrape_id}、または/v1/batches/{batch_id}/itemsエンドポイントのレスポンスで利用可能

formats

enum<string>[]

本番環境で特定のフォーマットのみを取得するためのオプションの配列。指定されていない場合、すべてのフォーマットが返されます。

利用可能なオプション:

html,

markdown,

json

レスポンス

ページコンテンツの成功したレスポンス。

html_content

string

要求され、利用可能な場合のページのHTMLコンテンツ。

markdown_content

string

要求され、利用可能な場合のページのMarkdownコンテンツ。

json_content

string

要求され、利用可能な場合のパーサーから返されたページのJSONコンテンツ。

html_hosted_url

string

HTMLのS3バケットURL。7日間有効。

markdown_hosted_url

string

MarkdownのS3バケットURL。7日間有効。

json_hosted_url

string

JSONのS3バケットURL。7日間有効。

size_exceeded

boolean

コンテンツオブジェクトのサイズが6MBの制限を超える場合。trueの場合、ホストされたS3 URLを使用してコンテンツを取得してください。

スケジュールの削除クレジット情報を取得する