Skip to content

Commit

Permalink
add File::resolver
Browse files Browse the repository at this point in the history
  • Loading branch information
s3bk committed Jul 30, 2023
1 parent 284d81c commit 0e5218d
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 66 deletions.
1 change: 1 addition & 0 deletions pdf/examples/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ fn main() -> Result<(), PdfError> {
right: 400.0
});
let resources = Resources::default();

/*
let font = Font {
name: Some("Test".into()),
Expand Down
4 changes: 3 additions & 1 deletion pdf/examples/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ fn main() -> Result<(), PdfError> {
.expect("Please provide a file path to the PDF you want to explore.");

let file = FileOptions::cached().open(&path).unwrap();
let resolver = file.resolver();

if let Some(ref info) = file.trailer.info_dict {
dbg!(info);
}

if let Some(ref forms) = file.get_root().forms {
for field in forms.fields.iter() {
print_field(field, &file);
print_field(field, &resolver);
}
}

Expand Down
9 changes: 5 additions & 4 deletions pdf/examples/names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ fn main() {
println!("read: {}", path);

let file = FileOptions::cached().open(&path).unwrap();
let resolver = file.resolver();
let catalog = file.get_root();

let mut pages_map: HashMap<String, PlainRef> = HashMap::new();
Expand All @@ -81,7 +82,7 @@ fn main() {

if let Some(ref names) = catalog.names {
if let Some(ref dests) = names.dests {
dests.walk(&file, &mut dests_cb).unwrap();
dests.walk(&resolver, &mut dests_cb).unwrap();
}
}

Expand All @@ -100,7 +101,7 @@ fn main() {
}
}
}
add_tree(&file, &mut pages, &catalog.pages, &mut 0);
add_tree(&resolver, &mut pages, &catalog.pages, &mut 0);

let get_page_nr = |name: &str| -> usize {
let page = pages_map[name];
Expand All @@ -112,8 +113,8 @@ fn main() {

if let Some(ref outlines) = catalog.outlines {
if let Some(entry_ref) = outlines.first {
let entry = file.get(entry_ref).unwrap();
walk_outline(&file, entry, &get_page_nr, &page_nr, 0);
let entry = resolver.get(entry_ref).unwrap();
walk_outline(&resolver, entry, &get_page_nr, &page_nr, 0);
}
}

Expand Down
21 changes: 16 additions & 5 deletions pdf/examples/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,30 @@ use std::time::SystemTime;
use std::fs;
use std::collections::HashMap;

use pdf::file::{FileOptions};
use pdf::file::{FileOptions, Log};
use pdf::object::*;
use pdf::primitive::Primitive;
use pdf::error::PdfError;
use pdf::enc::StreamFilter;

struct VerboseLog;
impl Log for VerboseLog {
fn load_object(&self, r: PlainRef) {
println!("load {r:?}");
}
fn log_get(&self, r: PlainRef) {
println!("get {r:?}");
}
}

fn main() -> Result<(), PdfError> {
let path = args().nth(1).expect("no file given");
println!("read: {}", path);
let now = SystemTime::now();

let file = FileOptions::cached().open(&path).unwrap();
let file = FileOptions::cached().log(VerboseLog).open(&path).unwrap();
let resolver = file.resolver();

if let Some(ref info) = file.trailer.info_dict {
let title = info.title.as_ref().map(|p| p.to_string_lossy());
let author = info.author.as_ref().map(|p| p.to_string_lossy());
Expand All @@ -44,7 +55,7 @@ fn main() -> Result<(), PdfError> {
};
fonts.insert(name, font.clone());
}
images.extend(resources.xobjects.iter().map(|(_name, &r)| file.get(r).unwrap())
images.extend(resources.xobjects.iter().map(|(_name, &r)| resolver.get(r).unwrap())
.filter(|o| matches!(**o, XObject::Image(_)))
);
}
Expand All @@ -54,7 +65,7 @@ fn main() -> Result<(), PdfError> {
XObject::Image(ref im) => im,
_ => continue
};
let (data, filter) = img.raw_image_data(&file)?;
let (data, filter) = img.raw_image_data(&resolver)?;
let ext = match filter {
Some(StreamFilter::DCTDecode(_)) => "jpeg",
Some(StreamFilter::JBIG2Decode) => "jbig2",
Expand All @@ -72,7 +83,7 @@ fn main() -> Result<(), PdfError> {

for (name, font) in fonts.iter() {
let fname = format!("font_{}", name);
if let Some(Ok(data)) = font.embedded_data(&file) {
if let Some(Ok(data)) = font.embedded_data(&resolver) {
fs::write(fname.as_str(), data).unwrap();
println!("Wrote file {}", fname);
}
Expand Down
139 changes: 85 additions & 54 deletions pdf/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
//! This is kind of the entry-point of the type-safe PDF functionality.
use std::cell::RefCell;

Check warning on line 2 in pdf/src/file.rs

View workflow job for this annotation

GitHub Actions / tests

unused import: `std::cell::RefCell`

Check warning on line 2 in pdf/src/file.rs

View workflow job for this annotation

GitHub Actions / clippy

unused import: `std::cell::RefCell`

warning: unused import: `std::cell::RefCell` --> pdf/src/file.rs:2:5 | 2 | use std::cell::RefCell; | ^^^^^^^^^^^^^^^^^^
use std::marker::PhantomData;
use std::collections::HashMap;
use std::sync::{Arc};
use std::sync::{Arc, Mutex};
use std::path::Path;
use std::io::Write;

Expand Down Expand Up @@ -56,6 +57,7 @@ impl<T: Clone + ValueSize + Send + 'static> Cache<T> for Arc<SyncCache<PlainRef,

pub trait Log {
fn load_object(&self, r: PlainRef) {}

Check warning on line 59 in pdf/src/file.rs

View workflow job for this annotation

GitHub Actions / clippy

unused variable: `r`

warning: unused variable: `r` --> pdf/src/file.rs:59:27 | 59 | fn load_object(&self, r: PlainRef) {} | ^ help: if this is intentional, prefix it with an underscore: `_r`
fn log_get(&self, r: PlainRef) {}

Check warning on line 60 in pdf/src/file.rs

View workflow job for this annotation

GitHub Actions / clippy

unused variable: `r`

warning: unused variable: `r` --> pdf/src/file.rs:60:23 | 60 | fn log_get(&self, r: PlainRef) {} | ^ help: if this is intentional, prefix it with an underscore: `_r`
}
pub struct NoLog;
impl Log for NoLog {}
Expand Down Expand Up @@ -143,7 +145,9 @@ where
}

pub fn load_storage_and_trailer_password(&mut self, password: &[u8]) -> Result<Dictionary> {
let (refs, trailer) = t!(self.backend.read_xref_table_and_trailer(self.start_offset, self));

let resolver = StorageResolver::new(self);
let (refs, trailer) = t!(self.backend.read_xref_table_and_trailer(self.start_offset, &resolver));
self.refs = refs;

if let Some(crypt) = trailer.get("Encrypt") {
Expand All @@ -156,13 +160,17 @@ where
.as_array()?[0]
.as_string()?
.as_bytes();
let dict = CryptDict::from_primitive(crypt.clone(), self)?;

let resolver = StorageResolver::new(self);
let dict = CryptDict::from_primitive(crypt.clone(), &resolver)?;

self.decoder = Some(t!(Decoder::from_password(&dict, key, password)));
if let Primitive::Reference(reference) = crypt {
self.decoder.as_mut().unwrap().encrypt_indirect_object = Some(*reference);
}
if let Some(Primitive::Reference(catalog_ref)) = trailer.get("Root") {
let catalog = t!(t!(self.resolve(*catalog_ref)).resolve(self)?.into_dictionary());
let resolver = StorageResolver::new(self);
let catalog = t!(t!(resolver.resolve(*catalog_ref)).resolve(&resolver)?.into_dictionary());
if let Some(Primitive::Reference(metadata_ref)) = catalog.get("Metadata") {
self.decoder.as_mut().unwrap().metadata_indirect_object = Some(*metadata_ref);
}
Expand All @@ -182,10 +190,11 @@ where
Ok(())
}

let resolver = StorageResolver::new(self);
std::iter::from_fn(move || {
loop {
let pos = lexer.get_pos();
match parse_indirect_object(&mut lexer, self, self.decoder.as_ref(), ParseFlags::all()) {
match parse_indirect_object(&mut lexer, &resolver, self.decoder.as_ref(), ParseFlags::all()) {
Ok((r, p)) => return Some(Ok(ScanItem::Object(r, p))),
Err(e) if e.is_eof() => return None,
Err(e) => {
Expand All @@ -212,56 +221,92 @@ where
}
})
}
}

pub enum ScanItem {
Object(PlainRef, Primitive),
Trailer(Dictionary)
}

impl<B, OC, SC, L> Resolve for Storage<B, OC, SC, L>
where
B: Backend,
OC: Cache<Result<AnySync, Arc<PdfError>>>,
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
L: Log
{
fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive> {
self.log.load_object(r);

fn resolve_ref(&self, r: PlainRef, flags: ParseFlags, resolve: &impl Resolve) -> Result<Primitive> {
match self.changes.get(&r.id) {
Some(p) => Ok((*p).clone()),
None => match t!(self.refs.get(r.id)) {
XRef::Raw {pos, ..} => {
let mut lexer = Lexer::with_offset(t!(self.backend.read(self.start_offset + pos ..)), self.start_offset + pos);
let p = t!(parse_indirect_object(&mut lexer, self, self.decoder.as_ref(), flags)).1;
let p = t!(parse_indirect_object(&mut lexer, resolve, self.decoder.as_ref(), flags)).1;
Ok(p)
}
XRef::Stream {stream_id, index} => {
if !flags.contains(ParseFlags::STREAM) {
return Err(PdfError::PrimitiveNotAllowed { found: ParseFlags::STREAM, allowed: flags });
}
if depth == 0 {
bail!("too deep");
}
// use get to cache the object stream
let obj_stream = self.get::<ObjectStream>(Ref::from_id(stream_id))?;
let obj_stream = resolve.get::<ObjectStream>(Ref::from_id(stream_id))?;

let (data, range) = t!(obj_stream.get_object_slice(index, self));
let (data, range) = t!(obj_stream.get_object_slice(index, resolve));
let slice = data.get(range.clone()).ok_or_else(|| other!("invalid range {:?}, but only have {} bytes", range, data.len()))?;
parse(slice, self, flags)
parse(slice, resolve, flags)
}
XRef::Free {..} => err!(PdfError::FreeObject {obj_nr: r.id}),
XRef::Promised => unimplemented!(),
XRef::Invalid => err!(PdfError::NullRef {obj_nr: r.id}),
}
}
}
}

pub enum ScanItem {
Object(PlainRef, Primitive),
Trailer(Dictionary)
}

struct StorageResolver<'a, B, OC, SC, L> {
storage: &'a Storage<B, OC, SC, L>,
chain: Mutex<Vec<PlainRef>>,
}
impl<'a, B, OC, SC, L> StorageResolver<'a, B, OC, SC, L> {
pub fn new(storage: &'a Storage<B, OC, SC, L>) -> Self {
StorageResolver {
storage,
chain: Mutex::new(vec![])
}
}
}

struct Defer<F: FnMut()>(F);
impl<F: FnMut()> Drop for Defer<F> {
fn drop(&mut self) {
(self.0)();
}
}

impl<'a, B, OC, SC, L> Resolve for StorageResolver<'a, B, OC, SC, L>
where
B: Backend,
OC: Cache<Result<AnySync, Arc<PdfError>>>,
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
L: Log
{
fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive> {

Check warning on line 284 in pdf/src/file.rs

View workflow job for this annotation

GitHub Actions / clippy

unused variable: `depth`

warning: unused variable: `depth` --> pdf/src/file.rs:284:61 | 284 | fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive> { | ^^^^^ help: if this is intentional, prefix it with an underscore: `_depth`
let storage = self.storage;
storage.log.load_object(r);

storage.resolve_ref(r, flags, self)
}

fn get<T: Object+DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
let key = r.get_inner();
self.storage.log.log_get(key);

{
println!("enter {key:?}");
let mut chain = self.chain.lock().unwrap();
if chain.contains(&key) {
bail!("Recursive reference");
}
chain.push(key);
}
let _defer = Defer(|| {
println!("exit {key:?}");
let mut chain = self.chain.lock().unwrap();
assert_eq!(chain.pop(), Some(key));
});

let res = self.cache.get_or_compute(key, || {
let res = self.storage.cache.get_or_compute(key, || {
match self.resolve(key).and_then(|p| T::from_primitive(p, self)) {
Ok(obj) => Ok(Shared::new(obj).into()),
Err(e) => Err(Arc::new(e)),
Expand All @@ -273,10 +318,10 @@ where
}
}
fn options(&self) -> &ParseOptions {
&self.options
&self.storage.options
}
fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
self.stream_cache.get_or_compute(id, || self.decode(id, range, filters).map_err(Arc::new))
self.storage.stream_cache.get_or_compute(id, || self.storage.decode(id, range, filters).map_err(Arc::new))
.map_err(|e| e.into())
}
}
Expand Down Expand Up @@ -390,26 +435,6 @@ pub struct File<B, OC, SC, L> {
storage: Storage<B, OC, SC, L>,
pub trailer: Trailer,
}
impl<B, OC, SC, L> Resolve for File<B, OC, SC, L>
where
B: Backend,
OC: Cache<Result<AnySync, Arc<PdfError>>>,
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
L: Log,
{
fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive> {
self.storage.resolve_flags(r, flags, depth)
}
fn get<T: Object+DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
self.storage.get(r)
}
fn options(&self) -> &ParseOptions {
self.storage.options()
}
fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
self.storage.get_data_or_decode(id, range, filters)
}
}
impl<B, OC, SC, L> Updater for File<B, OC, SC, L>
where
B: Backend,
Expand Down Expand Up @@ -539,15 +564,20 @@ where
fn load_data(backend: B, password: &[u8], options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result<Self> {
let mut storage = Storage::with_cache(backend, options, object_cache, stream_cache, log)?;
let trailer = storage.load_storage_and_trailer_password(password)?;

let resolver = StorageResolver::new(&storage);
let trailer = t!(Trailer::from_primitive(
Primitive::Dictionary(trailer),
&storage,
&resolver,
));
Ok(File { storage, trailer })
}
pub fn new(storage: Storage<B, OC, SC, L>, trailer: Trailer) -> Self {
File { storage, trailer }
}
pub fn resolver(&self) -> impl Resolve + '_ {
StorageResolver::new(&self.storage)
}

pub fn get_root(&self) -> &Catalog {
&self.trailer.root
Expand All @@ -561,7 +591,8 @@ where
}

pub fn get_page(&self, n: u32) -> Result<PageRc> {
self.trailer.root.pages.page(self, n)
let resolver = StorageResolver::new(&self.storage);
self.trailer.root.pages.page(&resolver, n)
}

pub fn update_catalog(&mut self, catalog: Catalog) -> Result<()> {
Expand Down
6 changes: 4 additions & 2 deletions pdf/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,12 @@ fn invalid_pdfs() {
fn parse_objects_from_stream() {
use pdf::object::NoResolve;
let file = run!(FileOptions::cached().open(file_path!("xelatex.pdf")));
let resolver = file.resolver();

// .. we know that object 13 of that file is an ObjectStream
let obj_stream: RcRef<ObjectStream> = run!(file.get(Ref::new(PlainRef {id: 13, gen: 0})));
let obj_stream: RcRef<ObjectStream> = run!(resolver.get(Ref::new(PlainRef {id: 13, gen: 0})));
for i in 0..obj_stream.n_objects() {
let (data, range) = run!(obj_stream.get_object_slice(i, &file));
let (data, range) = run!(obj_stream.get_object_slice(i, &resolver));
let slice = &data[range];
println!("Object slice #{}: {}\n", i, str::from_utf8(slice).unwrap());
run!(parse(slice, &NoResolve, ParseFlags::ANY));
Expand Down

0 comments on commit 0e5218d

Please sign in to comment.