Skip to content

Ingest Raw Articles in Near Real Time

The Raw Articles endpoint (/v1/news/raw) returns articles as they are discovered, before HTML parsing and NLP enrichment. Use it when you want the earliest possible access to incoming articles, or when you plan to run your own parsing and analysis instead of the enriched /v1/news/everything feed.

Only the last ~24 hours are available

This is a fast-churning staging feed — rows expire within roughly one day. Poll often enough that you never fall behind, and use /v1/news/everything for the full historical archive.

Step 1: Pull the newest raw articles

Sort by id descending (the default) to get the most recently discovered rows first. Keep per_page modest while you tune your loop (max 250).

bash
curl "https://api.apitube.io/v1/news/raw?sort.by=id&sort.order=desc&per_page=50&api_key=YOUR_API_KEY"
python
import requests

response = requests.get(
    "https://api.apitube.io/v1/news/raw",
    params={
        "sort.by": "id",
        "sort.order": "desc",
        "per_page": 50,
        "api_key": "YOUR_API_KEY",
    },
)
print(response.json())
javascript
const params = new URLSearchParams({ "sort.by": "id", "sort.order": "desc", "per_page": "50", "api_key": "YOUR_API_KEY" });
const response = await fetch(`https://api.apitube.io/v1/news/raw?${params}`);
const data = await response.json();
console.log(data);
php
$query = http_build_query(["sort.by" => "id", "sort.order" => "desc", "per_page" => 50, "api_key" => "YOUR_API_KEY"]);
$response = file_get_contents("https://api.apitube.io/v1/news/raw?$query");
$data = json_decode($response, true);
print_r($data);
go
package main

import (
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"net/url"
)

func main() {
	u, _ := url.Parse("https://api.apitube.io/v1/news/raw")
	q := u.Query()
	q.Set("sort.by", "id")
	q.Set("sort.order", "desc")
	q.Set("per_page", "50")
	q.Set("api_key", "YOUR_API_KEY")
	u.RawQuery = q.Encode()

	resp, _ := http.Get(u.String())
	defer resp.Body.Close()

	body, _ := io.ReadAll(resp.Body)
	var data map[string]any
	json.Unmarshal(body, &data)
	fmt.Println(data)
}
java
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;

public class Example {
    public static void main(String[] args) throws Exception {
        HttpClient client = HttpClient.newHttpClient();
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create("https://api.apitube.io/v1/news/raw?sort.by=id&sort.order=desc&per_page=50&api_key=YOUR_API_KEY"))
            .GET()
            .build();
        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
        System.out.println(response.body());
    }
}
text
Write a script in your preferred language that calls the APITube News API:

GET https://api.apitube.io/v1/news/raw?sort.by=id&sort.order=desc&per_page=50

Read the API key from an environment variable (do not hardcode it), handle request
errors, and print the key fields of each result.
Docs: https://docs.apitube.io/platform/news-api/parameters

Each row carries only what the source feed provided plus resolved publisher details — there is no language, category, topic, entity, industry, or sentiment data at this stage:

json
{
  "id": 84512377,
  "title": "AI advances reshape the chip industry in 2026",
  "href": "https://example.com/ai-advances-2026",
  "created_at": "2026-05-27 08:00:00",
  "body": "The field of artificial intelligence continues to move quickly...",
  "body_html": "<p>The field of artificial intelligence continues to move quickly...</p>",
  "author": "Jane Doe",
  "keywords": ["technology", "ai", "semiconductors"],
  "source": { "id": 1024, "domain": "example.com", "bias": "center" }
}

Step 2: Poll without missing or re-processing rows

id is monotonically increasing, so track the highest id you have already processed and stop paging once you reach it. On the next tick, start again from page 1. This is more reliable than time-based windows because it survives clock skew and out-of-order created_at values.

python
import time
import requests

seen_max_id = 0

while True:
    page = 1
    batch = []

    while True:
        resp = requests.get(
            "https://api.apitube.io/v1/news/raw",
            params={"sort.by": "id", "sort.order": "desc", "per_page": 100, "page": page, "api_key": "YOUR_API_KEY"},
        ).json()

        rows = resp.get("results", [])
        fresh = [r for r in rows if r["id"] > seen_max_id]
        batch.extend(fresh)

        # stop paging once we hit already-seen rows or run out of pages
        if len(fresh) < len(rows) or not resp.get("has_next_pages"):
            break

        page += 1

    if batch:
        seen_max_id = max(r["id"] for r in batch)
        # hand `batch` to your own parser / queue here
        print(f"ingested {len(batch)} new articles, cursor={seen_max_id}")

    time.sleep(60)
javascript
let seenMaxId = 0;

async function poll() {
  let page = 1;
  const batch = [];

  while (true) {
    const params = new URLSearchParams({ "sort.by": "id", "sort.order": "desc", "per_page": "100", "page": String(page), "api_key": "YOUR_API_KEY" });
    const resp = await (await fetch(`https://api.apitube.io/v1/news/raw?${params}`)).json();

    const rows = resp.results ?? [];
    const fresh = rows.filter(r => r.id > seenMaxId);
    batch.push(...fresh);

    if (fresh.length < rows.length || !resp.has_next_pages) break;
    page += 1;
  }

  if (batch.length) {
    seenMaxId = Math.max(...batch.map(r => r.id));
    // hand `batch` to your own parser / queue here
    console.log(`ingested ${batch.length} new articles, cursor=${seenMaxId}`);
  }
}

setInterval(poll, 60_000);
text
Write a script in your preferred language that calls the APITube News API:

GET https://api.apitube.io/v1/news/raw

Read the API key from an environment variable (do not hardcode it), handle request
errors, and print the key fields of each result.
Docs: https://docs.apitube.io/platform/news-api/parameters

Step 3: Narrow the feed (optional)

/v1/news/raw supports a small, fixed set of filters — only what exists at discovery time. Enrichment filters (title, language.code, category.*, entity.*, sentiment.*, …) are not available here.

  • source.id / ignore.source.id — include or exclude up to 3 source ids (comma-separated).
  • published_at — single-day window; or published_at.start / published_at.end for a range.
  • sort.byid (default), published_at, or created_at; sort.orderdesc (default) or asc.
bash
curl "https://api.apitube.io/v1/news/raw?source.id=1024,2048&per_page=100&api_key=YOUR_API_KEY"
python
import requests

response = requests.get(
    "https://api.apitube.io/v1/news/raw",
    params={
        "source.id": "1024,2048",
        "per_page": 100,
        "api_key": "YOUR_API_KEY",
    },
)
print(response.json())
javascript
const params = new URLSearchParams({ "source.id": "1024,2048", "per_page": "100", "api_key": "YOUR_API_KEY" });
const response = await fetch(`https://api.apitube.io/v1/news/raw?${params}`);
const data = await response.json();
console.log(data);
php
$query = http_build_query(["source.id" => "1024,2048", "per_page" => 100, "api_key" => "YOUR_API_KEY"]);
$response = file_get_contents("https://api.apitube.io/v1/news/raw?$query");
$data = json_decode($response, true);
print_r($data);
go
package main

import (
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"net/url"
)

func main() {
	u, _ := url.Parse("https://api.apitube.io/v1/news/raw")
	q := u.Query()
	q.Set("source.id", "1024,2048")
	q.Set("per_page", "100")
	q.Set("api_key", "YOUR_API_KEY")
	u.RawQuery = q.Encode()

	resp, _ := http.Get(u.String())
	defer resp.Body.Close()

	body, _ := io.ReadAll(resp.Body)
	var data map[string]any
	json.Unmarshal(body, &data)
	fmt.Println(data)
}
java
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;

public class Example {
    public static void main(String[] args) throws Exception {
        HttpClient client = HttpClient.newHttpClient();
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create("https://api.apitube.io/v1/news/raw?source.id=1024,2048&per_page=100&api_key=YOUR_API_KEY"))
            .GET()
            .build();
        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
        System.out.println(response.body());
    }
}
text
Write a script in your preferred language that calls the APITube News API:

GET https://api.apitube.io/v1/news/raw?source.id=1024,2048&per_page=100

Read the API key from an environment variable (do not hardcode it), handle request
errors, and print the key fields of each result.
Docs: https://docs.apitube.io/platform/news-api/parameters

Need language, categories, entities, or sentiment? Let the article flow through the pipeline and read it back from /v1/news/everything, which serves the enriched, fully searchable archive.