Skip to content

Commit ac99071

Browse files
committed
Atomize image
1 parent 674c936 commit ac99071

File tree

9 files changed

+287
-46
lines changed

9 files changed

+287
-46
lines changed

Cargo.lock

+196-30
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

atomizer/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ version = "0.1.0"
66

77
[dependencies]
88
atomic_lib = {version = "0.34.0", path = "../lib"}
9+
kamadak-exif = "0.5.5"
910
# Should match the version of pdf-extract
1011
mime_guess = "2.0.4"
11-
pdf-extract = {git = "https://github.com/Hessesian/pdf-extract"}
12+
pdf-extract = {path = "../../../joepio/pdf-extract"}

atomizer/src/file.rs

+24-10
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,52 @@
1-
use std::{collections::HashMap, error::Error};
1+
use std::{collections::HashMap, error::Error, io::Read};
22

33
use atomic_lib::resources::PropVals;
44
use mime_guess::Mime;
55

6-
use crate::pdf;
7-
86
pub struct File {
97
filename: String,
108
mime: Mime,
11-
bytes: Vec<u8>,
9+
reader: std::io::BufReader<std::fs::File>,
1210
}
1311

1412
impl File {
1513
pub fn open(filename: &str) -> Result<File, Box<dyn Error>> {
16-
let bytes = std::fs::read(filename)?;
14+
let bytes = std::fs::File::open(filename)?;
15+
let reader = std::io::BufReader::new(bytes);
1716
let mime = mime_guess::from_path(filename).first_or_octet_stream();
1817

1918
Ok(File {
2019
filename: filename.to_string(),
2120
mime,
22-
bytes,
21+
reader,
2322
})
2423
}
2524

2625
/// Creates property-value combinations based on the file's contents.
2726
/// Defaults to an empty HashMap if the file type is not supported.
28-
pub fn atomize(&self) -> PropVals {
27+
pub fn atomize(self) -> PropVals {
2928
match self.mime.to_string().as_str() {
30-
"application/pdf" => pdf::atomize(self),
29+
"application/pdf" => crate::pdf::atomize(self),
30+
"image/jpeg" => crate::image::atomize(self),
3131
_ => HashMap::new(),
3232
}
3333
}
3434

35-
pub fn bytes(&self) -> &[u8] {
36-
&self.bytes
35+
pub fn bytes(&mut self) -> Result<Vec<u8>, Box<dyn Error>> {
36+
let mut buffer = vec![];
37+
self.reader.read_to_end(&mut buffer)?;
38+
Ok(buffer)
39+
}
40+
41+
pub fn reader(&mut self) -> &mut std::io::BufReader<std::fs::File> {
42+
&mut self.reader
43+
}
44+
45+
pub fn mime(&self) -> &Mime {
46+
&self.mime
47+
}
48+
49+
pub fn filename(&self) -> &str {
50+
&self.filename
3751
}
3852
}

atomizer/src/image.rs

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
use atomic_lib::resources::PropVals;
2+
use exif::{In, Tag};
3+
4+
const date_time: &str = "date_time";
5+
6+
fn map_tag(tag: Tag) -> String {
7+
match tag {
8+
Tag::PixelXDimension => "pixel_x_dimension",
9+
Tag::XResolution => "x_resolution",
10+
Tag::ImageDescription => "image_description",
11+
Tag::DateTime => date_time,
12+
_ => "unknown",
13+
}
14+
.to_string()
15+
}
16+
17+
/// Extracts the location from an image file's EXIF data.
18+
pub fn atomize(mut file: crate::file::File) -> PropVals {
19+
let mut props = PropVals::new();
20+
21+
println!("Reading EXIF data from {}", file.filename());
22+
23+
let exif = exif::Reader::new()
24+
.read_from_container(file.reader())
25+
.unwrap();
26+
27+
let tag_list = [
28+
Tag::PixelXDimension,
29+
Tag::XResolution,
30+
Tag::ImageDescription,
31+
Tag::DateTime,
32+
];
33+
34+
for tag in tag_list {
35+
if let Some(field) = exif.get_field(tag, In::PRIMARY) {
36+
props.insert(
37+
map_tag(tag),
38+
atomic_lib::Value::String(field.display_value().to_string()),
39+
);
40+
println!("{}: {}", field.tag, field.display_value().with_unit(&exif));
41+
}
42+
}
43+
44+
props
45+
}
46+
47+
#[cfg(test)]
48+
mod tests {
49+
use super::*;
50+
use crate::file::File;
51+
52+
#[test]
53+
fn load_image() {
54+
let f = File::open("./test/image.jpg").unwrap();
55+
let propvals = f.atomize();
56+
let dt = propvals.get(date_time).unwrap();
57+
println!("Date: {}", dt);
58+
assert!(dt.to_string().contains("2008"));
59+
}
60+
}

atomizer/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod file;
2+
mod image;
23
mod pdf;
34

45
use atomic_lib::Resource;

atomizer/src/pdf.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ use atomic_lib::resources::PropVals;
33
const content_prop: &str = "content";
44

55
/// Extracts the text from a PDF file.
6-
pub fn atomize(file: &crate::file::File) -> PropVals {
6+
pub fn atomize(mut file: crate::file::File) -> PropVals {
77
let mut props = PropVals::new();
8-
let mut s = String::new();
9-
let mut output = pdf_extract::PlainTextOutput::new(&mut s);
10-
let text = pdf_extract::extract_text_mem(file.bytes()).unwrap();
8+
let bytes = file.bytes().unwrap();
9+
let text = pdf_extract::extract_text_from_mem(&bytes).unwrap();
1110
props.insert(content_prop.into(), atomic_lib::Value::String(text));
1211
props
1312
}

atomizer/test/image.jpg

147 KB
Loading

atomizer/test/simple.pdf

2.96 KB
Binary file not shown.

lib/src/populate.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
parse::ParseOpts,
1010
schema::{Class, Property},
1111
storelike::Query,
12-
urls, Storelike, Value,
12+
urls, Resource, Storelike, Value,
1313
};
1414

1515
/// Populates a store with some of the most fundamental Properties and Classes needed to bootstrap the whole.

0 commit comments

Comments
 (0)