Parse 和 Knowledge
解析网页,创建数据集,摄入内容,构建知识并执行搜索。
这个流程从公开 URL 开始,将其解析为 Markdown,再把文本摄入知识数据集并执行搜索。下面的 API 片段是 AI 生成的 Python、JavaScript 和 Java 客户端示例。
解析 URL
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "sources": [ "https://docs.cognee.ai/core-concepts/overview" ], "engine_id": "auto"}response = requests.post( f"{BASE_URL}/v1/parse/sync", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data['results'][0]['document']['markdown'][:800]);const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "sources": [ "https://docs.cognee.ai/core-concepts/overview" ], "engine_id": "auto"};const response = await fetch(`${BASE_URL}/v1/parse/sync`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data.results[0].document.markdown.slice(0, 800));import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"sources\": [ \"https://docs.cognee.ai/core-concepts/overview\" ], \"engine_id\": \"auto\" } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/parse/sync")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}重型文档解析建议提交异步 Job:
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "sources": [ "https://docs.crawl4ai.com/advanced/advanced-features/" ], "engine_id": "crawl4ai", "priority": 5}response = requests.post( f"{BASE_URL}/v1/parse/jobs", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "sources": [ "https://docs.crawl4ai.com/advanced/advanced-features/" ], "engine_id": "crawl4ai", "priority": 5};const response = await fetch(`${BASE_URL}/v1/parse/jobs`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"sources\": [ \"https://docs.crawl4ai.com/advanced/advanced-features/\" ], \"engine_id\": \"crawl4ai\", \"priority\": 5 } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/parse/jobs")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}轮询通用 Job,并获取解析结果:
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}response = requests.get(f"{BASE_URL}/v1/jobs/job_xxx", headers=auth_headers())response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const response = await fetch(`${BASE_URL}/v1/jobs/job_xxx`, { headers: authHeaders,});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/jobs/job_xxx")) .header("Authorization", "Bearer " + TOKEN) .GET() .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}response = requests.get(f"{BASE_URL}/v1/parse/jobs/job_xxx/result", headers=auth_headers())response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const response = await fetch(`${BASE_URL}/v1/parse/jobs/job_xxx/result`, { headers: authHeaders,});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/parse/jobs/job_xxx/result")) .header("Authorization", "Bearer " + TOKEN) .GET() .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}上传小文件
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}metadata = {"source": "storage-quickstart", "document_type": "guide"}access_policy = {"access_level": "tenant_shared"}with open("cortex-storage-quickstart.md", "w", encoding="utf-8") as f: f.write("# Cortex Storage\n\nThis file was uploaded through Cortex.")with open("cortex-storage-quickstart.md", "rb") as f: response = requests.post( f"{BASE_URL}/v1/storage/files", headers=auth_headers(), files={"file": ("cortex-storage-quickstart.md", f, "text/markdown")}, data={ "metadata_json": __import__("json").dumps(metadata), "access_policy_json": __import__("json").dumps(access_policy), "tags": "quickstart,docs", }, )response.raise_for_status()print(response.json())const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const form = new FormData();form.append( "file", new Blob(["# Cortex Storage\n\nThis file was uploaded through Cortex."], { type: "text/markdown", }), "cortex-storage-quickstart.md",);form.append("metadata_json", JSON.stringify({ source: "storage-quickstart", document_type: "guide" }));form.append("access_policy_json", JSON.stringify({ access_level: "tenant_shared" }));form.append("tags", "quickstart,docs");const response = await fetch(`${BASE_URL}/v1/storage/files`, { method: "POST", headers: authHeaders, body: form,});if (!response.ok) throw new Error(await response.text());console.log(await response.json());import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String boundary = "----CortexBoundary" + System.currentTimeMillis(); String body = "" + "--" + boundary + "\r\n" + "Content-Disposition: form-data; name=\"file\"; filename=\"cortex-storage-quickstart.md\"\r\n" + "Content-Type: text/markdown\r\n\r\n" + "# Cortex Storage\n\nThis file was uploaded through Cortex.\r\n" + "--" + boundary + "\r\n" + "Content-Disposition: form-data; name=\"metadata_json\"\r\n\r\n" + "{\"source\":\"storage-quickstart\",\"document_type\":\"guide\"}\r\n" + "--" + boundary + "\r\n" + "Content-Disposition: form-data; name=\"access_policy_json\"\r\n\r\n" + "{\"access_level\":\"tenant_shared\"}\r\n" + "--" + boundary + "\r\n" + "Content-Disposition: form-data; name=\"tags\"\r\n\r\n" + "quickstart,docs\r\n" + "--" + boundary + "--\r\n"; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/storage/files")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "multipart/form-data; boundary=" + boundary) .POST(HttpRequest.BodyPublishers.ofString(body)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}返回的 object_id 可用于 Parse、Knowledge、Evaluation 或 Synthesis。
创建数据集
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "dataset_key": "quickstart_knowledge_demo", "display_name": "Quickstart Knowledge Demo", "description": "Small Cortex Knowledge dataset", "tags": [ "quickstart", "knowledge" ], "retention_class": "temporary", "access_policy": { "access_level": "tenant_shared" }}response = requests.post( f"{BASE_URL}/v1/knowledge/datasets", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "dataset_key": "quickstart_knowledge_demo", "display_name": "Quickstart Knowledge Demo", "description": "Small Cortex Knowledge dataset", "tags": [ "quickstart", "knowledge" ], "retention_class": "temporary", "access_policy": { "access_level": "tenant_shared" }};const response = await fetch(`${BASE_URL}/v1/knowledge/datasets`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"dataset_key\": \"quickstart_knowledge_demo\", \"display_name\": \"Quickstart Knowledge Demo\", \"description\": \"Small Cortex Knowledge dataset\", \"tags\": [ \"quickstart\", \"knowledge\" ], \"retention_class\": \"temporary\", \"access_policy\": { \"access_level\": \"tenant_shared\" } } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/knowledge/datasets")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}摄入内联文本
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "dataset_key": "quickstart_knowledge_demo", "inputs": [ { "input_type": "text", "text": "# Cortex APIs\nCortex provides Parse, Storage, Knowledge, Evaluation, and Synthesis APIs.", "label": "Cortex API note", "node_set": [ "quickstart", "docs" ], "metadata": { "source": "manual", "document_type": "note" } } ], "options": { "normalize_text": True, "structured_ingest": True, "incremental": True, "persist_source_copy": False }}response = requests.post( f"{BASE_URL}/v1/knowledge/add/jobs", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "dataset_key": "quickstart_knowledge_demo", "inputs": [ { "input_type": "text", "text": "# Cortex APIs\nCortex provides Parse, Storage, Knowledge, Evaluation, and Synthesis APIs.", "label": "Cortex API note", "node_set": [ "quickstart", "docs" ], "metadata": { "source": "manual", "document_type": "note" } } ], "options": { "normalize_text": true, "structured_ingest": true, "incremental": true, "persist_source_copy": false }};const response = await fetch(`${BASE_URL}/v1/knowledge/add/jobs`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"dataset_key\": \"quickstart_knowledge_demo\", \"inputs\": [ { \"input_type\": \"text\", \"text\": \"# Cortex APIs\\nCortex provides Parse, Storage, Knowledge, Evaluation, and Synthesis APIs.\", \"label\": \"Cortex API note\", \"node_set\": [ \"quickstart\", \"docs\" ], \"metadata\": { \"source\": \"manual\", \"document_type\": \"note\" } } ], \"options\": { \"normalize_text\": true, \"structured_ingest\": true, \"incremental\": true, \"persist_source_copy\": false } } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/knowledge/add/jobs")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}启动或等待 cortex-knowledge-worker 消费任务,然后通过 Job API 查看完成状态。
构建图谱
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "dataset_key": "quickstart_knowledge_demo", "incremental_loading": True, "graph_prompt_profile": "simple", "chunking": { "enabled": True, "strategy": "semantic", "target_tokens": 384, "overlap_tokens": 48, "max_chunks": 64 }}response = requests.post( f"{BASE_URL}/v1/knowledge/cognify/jobs", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "dataset_key": "quickstart_knowledge_demo", "incremental_loading": true, "graph_prompt_profile": "simple", "chunking": { "enabled": true, "strategy": "semantic", "target_tokens": 384, "overlap_tokens": 48, "max_chunks": 64 }};const response = await fetch(`${BASE_URL}/v1/knowledge/cognify/jobs`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"dataset_key\": \"quickstart_knowledge_demo\", \"incremental_loading\": true, \"graph_prompt_profile\": \"simple\", \"chunking\": { \"enabled\": true, \"strategy\": \"semantic\", \"target_tokens\": 384, \"overlap_tokens\": 48, \"max_chunks\": 64 } } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/knowledge/cognify/jobs")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}搜索
import osimport requestsBASE_URL = os.getenv("CORTEX_URL", "http://127.0.0.1:8080")TOKEN = os.getenv("CORTEX_TOKEN", "replace_with_token")def auth_headers(): return {"Authorization": f"Bearer {TOKEN}"}payload = { "query_text": "What APIs does Cortex expose?", "dataset_keys": [ "quickstart_knowledge_demo" ], "search_type": "GRAPH_COMPLETION", "top_k": 5, "only_context": False, "include_provenance": True, "include_graph_paths": True, "timeout_seconds": 30}response = requests.post( f"{BASE_URL}/v1/knowledge/search", headers={**auth_headers(), "Content-Type": "application/json"}, json=payload,)response.raise_for_status()data = response.json()print(data)const BASE_URL = process.env.CORTEX_URL ?? "http://127.0.0.1:8080";const TOKEN = process.env.CORTEX_TOKEN ?? "replace_with_token";const authHeaders = { Authorization: `Bearer ${TOKEN}`,};const payload = { "query_text": "What APIs does Cortex expose?", "dataset_keys": [ "quickstart_knowledge_demo" ], "search_type": "GRAPH_COMPLETION", "top_k": 5, "only_context": false, "include_provenance": true, "include_graph_paths": true, "timeout_seconds": 30};const response = await fetch(`${BASE_URL}/v1/knowledge/search`, { method: "POST", headers: { ...authHeaders, "Content-Type": "application/json" }, body: JSON.stringify(payload),});if (!response.ok) throw new Error(await response.text());const data = await response.json();console.log(data);import java.net.URI;import java.net.http.HttpClient;import java.net.http.HttpRequest;import java.net.http.HttpResponse;public class CortexExample { static final String BASE_URL = System.getenv().getOrDefault("CORTEX_URL", "http://127.0.0.1:8080"); static final String TOKEN = System.getenv().getOrDefault("CORTEX_TOKEN", "replace_with_token"); static final HttpClient HTTP = HttpClient.newHttpClient(); static void print(HttpResponse<String> response) { System.out.println(response.statusCode()); System.out.println(response.body()); } public static void main(String[] args) throws Exception { String json = """ { \"query_text\": \"What APIs does Cortex expose?\", \"dataset_keys\": [ \"quickstart_knowledge_demo\" ], \"search_type\": \"GRAPH_COMPLETION\", \"top_k\": 5, \"only_context\": false, \"include_provenance\": true, \"include_graph_paths\": true, \"timeout_seconds\": 30 } """; HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(BASE_URL + "/v1/knowledge/search")) .header("Authorization", "Bearer " + TOKEN) .header("Content-Type", "application/json") .POST(HttpRequest.BodyPublishers.ofString(json)) .build(); print(HTTP.send(request, HttpResponse.BodyHandlers.ofString())); }}