Skip to content

Rust API

Parse a PDF file and return a structured Document.

use std::path::Path;
use edgeparse_core::api::config::ProcessingConfig;
use edgeparse_core::convert;
let config = ProcessingConfig::default();
let doc = convert(Path::new("document.pdf"), &config)?;
println!("Pages: {}", doc.number_of_pages);
println!("Elements: {}", doc.kids.len());
FieldTypeDescription
file_nameStringSource file name
number_of_pagesusizeTotal pages
authorOption<String>PDF author metadata
titleOption<String>PDF title metadata
kidsVec<ContentElement>Extracted elements in reading order
use edgeparse_core::output;
// Markdown
let md = output::markdown::to_markdown(&doc)?;
// HTML
let html = output::html::to_html(&doc)?;
// JSON (legacy-compatible)
let json = output::legacy_json::to_legacy_json_string(&doc, "document")?;

Low-level PDF COS object parser. Used internally by edgeparse-core.

[dependencies]
edgeparse-core = "0.1"