Parse API Reference
将 URL、文件和 Storage 对象解析为标准化 Markdown 的接口。
当输入仍是原始内容,而输出需要 Markdown 与结构化元数据时,使用 Parse。下面的 API 示例以 AI 生成的 Python、JavaScript 和 Java 客户端代码展示。
| 方法 | 路径 | Scope | 作用 |
|---|---|---|---|
GET | /v1/parse/engines | parse:read | 列出已注册解析引擎和支持场景。 |
GET | /v1/parse/profiles | parse:read | 查看内部 parser profiles,供运维和调试使用。 |
POST | /v1/parse/sync | parse:write | 同步解析一个或多个 sources。 |
POST | /v1/parse/jobs | parse:write | 提交异步解析 job。 |
GET | /v1/parse/jobs/{jobId}/result | parse:read | 读取已完成的异步解析结果。 |
查看引擎
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}response = requests.get(f"{BASE_URL}/v1/parse/engines", headers=auth_headers())response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const response = await fetch(`${BASE_URL}/v1/parse/engines`, { headers: authHeaders,});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/parse/engines")) .header("Authorization", "Bearer " + TOKEN) .GET() .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}同步解析
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "sources": [ "https://docs.cognee.ai/core-concepts/overview" ], "engine_id": "auto"}response = requests.post( f"{BASE_URL}/v1/parse/sync", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data['results'][0]['document']['markdown'][:800]);const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "sources": [ "https://docs.cognee.ai/core-concepts/overview" ], "engine_id": "auto"};const response = await fetch(`${BASE_URL}/v1/parse/sync`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data.results[0].document.markdown.slice(0, 800));import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"sources\": [ \"https://docs.cognee.ai/core-concepts/overview\" ], \"engine_id\": \"auto\" } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/parse/sync")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}常规路由使用 engine_id: "auto"。对比引擎或需要特定解析器时,可指定 crawl4ai、jina_reader、markitdown、llama_parse 或 docling。
异步解析 job
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "sources": [ "https://docs.crawl4ai.com/advanced/advanced-features/" ], "engine_id": "crawl4ai", "priority": 5}response = requests.post( f"{BASE_URL}/v1/parse/jobs", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "sources": [ "https://docs.crawl4ai.com/advanced/advanced-features/" ], "engine_id": "crawl4ai", "priority": 5};const response = await fetch(`${BASE_URL}/v1/parse/jobs`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"sources\": [ \"https://docs.crawl4ai.com/advanced/advanced-features/\" ], \"engine_id\": \"crawl4ai\", \"priority\": 5 } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/parse/jobs")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}异步响应会返回一个或多个 job_id。先轮询 GET /v1/jobs/{jobId},完成后读取 GET /v1/parse/jobs/{jobId}/result。