-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmetadata_from_gear_output.py
executable file
·99 lines (80 loc) · 3.32 KB
/
metadata_from_gear_output.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
import os
import json
import datetime
# Build dict of types, which maps extensions to known data types
data_types = {
"bval": [ ".bval", ".bvals" ],
"bvec": [ ".bvec", ".bvecs" ],
"dicom": [ ".dcm", ".dcm.zip", ".dicom.zip" ],
"parrec": [ ".parrec.zip", ".par-rec.zip" ],
"gephysio": [ ".gephysio.zip" ],
"MATLAB data": [ ".mat" ],
"nifti": [ ".nii.gz", ".nii" ],
"pfile": [ ".7.gz", ".7" ],
"PsychoPy data": [ ".psydat" ],
"qa": [ ".qa.png", ".qa.json" ],
"archive": [ ".zip", ".tbz2", ".tar.gz", ".tbz", ".tar.bz2", ".tgz", ".tar", ".txz", ".tar.xz" ],
"document": [ ".docx", ".doc" ],
"image": [ ".jpg", ".tif", ".jpeg", ".gif", ".bmp", ".png", ".tiff" ],
"markup": [ ".html", ".htm" ],
"log": [ ".log" ],
"pdf": [ ".pdf" ],
"presentation": [ ".ppt", ".pptx" ],
"source code": [ ".c", ".py", ".cpp", ".js", ".m", ".json", ".java", ".php", ".css" ],
"spreadsheet": [ ".xls", ".xlsx" ],
"tabular data": [ ".csv.gz", ".csv" ],
"text": [ ".txt" ],
"video": [ ".mpeg", ".mpg", ".mov", ".mp4", ".m4v", ".mts" ]
}
def meta_create(outbase):
# Default to gear output directory
if not os.path.isdir(outbase):
outbase = '/flywheel/v0/output'
# Build a dict of output file names and data types
output_files = [
os.path.relpath(os.path.join(root, file), outbase)
for root, dirs, files in os.walk(outbase)
for file in files
]
files = []
if len(output_files) > 0:
for f in output_files:
fdict = {}
fdict['name'] = f
# Check file extension against every data_type to determine type
ftype = ''
for d in data_types:
extensions = list(data_types[d])
# For any given type there could be multiple matching extensions
if any([f.endswith(ext) for ext in extensions]):
ftype = d
if not ftype:
ftype = 'None'
fdict['type'] = ftype
files.append(fdict)
# Assemble final metadata
metadata = {}
metadata['acquisition'] = {}
metadata['acquisition']['files'] = files
# Write metadata file
with open(os.path.join(outbase, '.metadata.json'), 'w') as metafile:
json.dump(metadata, metafile)
return os.path.join(outbase,'.metadata.json')
if __name__ == '__main__':
"""
Given a directory ('outbase') determine all file names and types by mapping extenstions to data_types dict.
Generate and write '.metadata.json' in 'outbase'. Metadata will consist of filenames and data types for all files in 'outbase'.
Example Usage:
python metadata_from_gear_outuput.py /output/directory/ scitran/dtiinit
"""
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('outbase', help='Base directory to be scanned for output files')
ap.add_argument('gearname', help='Name of running gear', default='gear')
args = ap.parse_args()
metafile = meta_create(args.outbase)
if os.path.isfile(metafile):
print args.gearname + ' generated %s' % metafile
else:
print args.gearname + ' Failed to create metadata.json'