Ingest Raw Articles in Near Real Time
The Raw Articles endpoint (/v1/news/raw) returns articles as they are discovered, before HTML parsing and NLP enrichment. Use it when you want the earliest possible access to incoming articles, or when you plan to run your own parsing and analysis instead of the enriched /v1/news/everything feed.
Only the last ~24 hours are available
This is a fast-churning staging feed — rows expire within roughly one day. Poll often enough that you never fall behind, and use /v1/news/everything for the full historical archive.
Step 1: Pull the newest raw articles
Sort by id descending (the default) to get the most recently discovered rows first. Keep per_page modest while you tune your loop (max 250).
curl "https://api.apitube.io/v1/news/raw?sort.by=id&sort.order=desc&per_page=50&api_key=YOUR_API_KEY"import requests
response = requests.get(
"https://api.apitube.io/v1/news/raw",
params={
"sort.by": "id",
"sort.order": "desc",
"per_page": 50,
"api_key": "YOUR_API_KEY",
},
)
print(response.json())const params = new URLSearchParams({ "sort.by": "id", "sort.order": "desc", "per_page": "50", "api_key": "YOUR_API_KEY" });
const response = await fetch(`https://api.apitube.io/v1/news/raw?${params}`);
const data = await response.json();
console.log(data);$query = http_build_query(["sort.by" => "id", "sort.order" => "desc", "per_page" => 50, "api_key" => "YOUR_API_KEY"]);
$response = file_get_contents("https://api.apitube.io/v1/news/raw?$query");
$data = json_decode($response, true);
print_r($data);package main
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
)
func main() {
u, _ := url.Parse("https://api.apitube.io/v1/news/raw")
q := u.Query()
q.Set("sort.by", "id")
q.Set("sort.order", "desc")
q.Set("per_page", "50")
q.Set("api_key", "YOUR_API_KEY")
u.RawQuery = q.Encode()
resp, _ := http.Get(u.String())
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var data map[string]any
json.Unmarshal(body, &data)
fmt.Println(data)
}import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
public class Example {
public static void main(String[] args) throws Exception {
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://api.apitube.io/v1/news/raw?sort.by=id&sort.order=desc&per_page=50&api_key=YOUR_API_KEY"))
.GET()
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
}
}Write a script in your preferred language that calls the APITube News API:
GET https://api.apitube.io/v1/news/raw?sort.by=id&sort.order=desc&per_page=50
Read the API key from an environment variable (do not hardcode it), handle request
errors, and print the key fields of each result.
Docs: https://docs.apitube.io/platform/news-api/parametersEach row carries only what the source feed provided plus resolved publisher details — there is no language, category, topic, entity, industry, or sentiment data at this stage:
{
"id": 84512377,
"title": "AI advances reshape the chip industry in 2026",
"href": "https://example.com/ai-advances-2026",
"created_at": "2026-05-27 08:00:00",
"body": "The field of artificial intelligence continues to move quickly...",
"body_html": "<p>The field of artificial intelligence continues to move quickly...</p>",
"author": "Jane Doe",
"keywords": ["technology", "ai", "semiconductors"],
"source": { "id": 1024, "domain": "example.com", "bias": "center" }
}Step 2: Poll without missing or re-processing rows
id is monotonically increasing, so track the highest id you have already processed and stop paging once you reach it. On the next tick, start again from page 1. This is more reliable than time-based windows because it survives clock skew and out-of-order created_at values.
import time
import requests
seen_max_id = 0
while True:
page = 1
batch = []
while True:
resp = requests.get(
"https://api.apitube.io/v1/news/raw",
params={"sort.by": "id", "sort.order": "desc", "per_page": 100, "page": page, "api_key": "YOUR_API_KEY"},
).json()
rows = resp.get("results", [])
fresh = [r for r in rows if r["id"] > seen_max_id]
batch.extend(fresh)
# stop paging once we hit already-seen rows or run out of pages
if len(fresh) < len(rows) or not resp.get("has_next_pages"):
break
page += 1
if batch:
seen_max_id = max(r["id"] for r in batch)
# hand `batch` to your own parser / queue here
print(f"ingested {len(batch)} new articles, cursor={seen_max_id}")
time.sleep(60)let seenMaxId = 0;
async function poll() {
let page = 1;
const batch = [];
while (true) {
const params = new URLSearchParams({ "sort.by": "id", "sort.order": "desc", "per_page": "100", "page": String(page), "api_key": "YOUR_API_KEY" });
const resp = await (await fetch(`https://api.apitube.io/v1/news/raw?${params}`)).json();
const rows = resp.results ?? [];
const fresh = rows.filter(r => r.id > seenMaxId);
batch.push(...fresh);
if (fresh.length < rows.length || !resp.has_next_pages) break;
page += 1;
}
if (batch.length) {
seenMaxId = Math.max(...batch.map(r => r.id));
// hand `batch` to your own parser / queue here
console.log(`ingested ${batch.length} new articles, cursor=${seenMaxId}`);
}
}
setInterval(poll, 60_000);Write a script in your preferred language that calls the APITube News API:
GET https://api.apitube.io/v1/news/raw
Read the API key from an environment variable (do not hardcode it), handle request
errors, and print the key fields of each result.
Docs: https://docs.apitube.io/platform/news-api/parametersStep 3: Narrow the feed (optional)
/v1/news/raw supports a small, fixed set of filters — only what exists at discovery time. Enrichment filters (title, language.code, category.*, entity.*, sentiment.*, …) are not available here.
source.id/ignore.source.id— include or exclude up to 3 source ids (comma-separated).published_at— single-day window; orpublished_at.start/published_at.endfor a range.sort.by—id(default),published_at, orcreated_at;sort.order—desc(default) orasc.
curl "https://api.apitube.io/v1/news/raw?source.id=1024,2048&per_page=100&api_key=YOUR_API_KEY"import requests
response = requests.get(
"https://api.apitube.io/v1/news/raw",
params={
"source.id": "1024,2048",
"per_page": 100,
"api_key": "YOUR_API_KEY",
},
)
print(response.json())const params = new URLSearchParams({ "source.id": "1024,2048", "per_page": "100", "api_key": "YOUR_API_KEY" });
const response = await fetch(`https://api.apitube.io/v1/news/raw?${params}`);
const data = await response.json();
console.log(data);$query = http_build_query(["source.id" => "1024,2048", "per_page" => 100, "api_key" => "YOUR_API_KEY"]);
$response = file_get_contents("https://api.apitube.io/v1/news/raw?$query");
$data = json_decode($response, true);
print_r($data);package main
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
)
func main() {
u, _ := url.Parse("https://api.apitube.io/v1/news/raw")
q := u.Query()
q.Set("source.id", "1024,2048")
q.Set("per_page", "100")
q.Set("api_key", "YOUR_API_KEY")
u.RawQuery = q.Encode()
resp, _ := http.Get(u.String())
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var data map[string]any
json.Unmarshal(body, &data)
fmt.Println(data)
}import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
public class Example {
public static void main(String[] args) throws Exception {
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://api.apitube.io/v1/news/raw?source.id=1024,2048&per_page=100&api_key=YOUR_API_KEY"))
.GET()
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
}
}Write a script in your preferred language that calls the APITube News API:
GET https://api.apitube.io/v1/news/raw?source.id=1024,2048&per_page=100
Read the API key from an environment variable (do not hardcode it), handle request
errors, and print the key fields of each result.
Docs: https://docs.apitube.io/platform/news-api/parametersNeed language, categories, entities, or sentiment? Let the article flow through the pipeline and read it back from
/v1/news/everything, which serves the enriched, fully searchable archive.