Skip to content

Commit 299f6f4

Browse files
committed
minor updates to metadata parsing
1 parent 81b1e1e commit 299f6f4

File tree

2 files changed

+26
-9
lines changed

2 files changed

+26
-9
lines changed

rocrate/provenance_profile.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from prov.identifier import Identifier
1818
from prov.model import PROV, PROV_LABEL, PROV_TYPE, PROV_VALUE, ProvDocument, ProvEntity
19-
from tools.load_ga_export import load_ga_history_export, GalaxyJob
19+
from tools.load_ga_export import load_ga_history_export, GalaxyJob, GalaxyDataset
2020
from ast import literal_eval
2121
import os
2222

@@ -109,14 +109,30 @@ def __init__(
109109
# move to separate function
110110
metadata_export = load_ga_history_export(ga_export)
111111
self.generate_prov_doc()
112-
self.jobs = []
112+
113+
self.datasets = {}
113114
# print(metadata_export["jobs_attrs"][0]["params"])
114-
for job in metadata_export["jobs_attrs"]:
115+
for i,dataset in enumerate(metadata_export["datasets_attrs"]):
116+
datasets_attrs = GalaxyDataset()
117+
datasets_attrs.parse_ga_dataset_attrs(dataset)
118+
print(i)
119+
print(datasets_attrs.attributes['encoded_id'])
120+
self.datasets[datasets_attrs.attributes['encoded_id']] = datasets_attrs.attributes
121+
# self.declare_process(ds_attrs.attributes)
122+
123+
self.jobs = {}
124+
for i,job in enumerate(metadata_export["jobs_attrs"]):
115125
job_attrs = GalaxyJob()
116126
job_attrs.parse_ga_jobs_attrs(job)
117-
self.jobs.append(job_attrs.attributes)
127+
print(i)
128+
print(job_attrs.attributes.keys())
129+
# for k,v in job_attrs.attributes['parameters'].items():
130+
# print(k, " : ",v)
131+
self.jobs[job_attrs.attributes['encoded_id']] = job_attrs.attributes
118132
self.declare_process(job_attrs.attributes)
119133

134+
# print(self.jobs[0]['inputs'])
135+
120136
def __str__(self) -> str:
121137
"""Represent this Provenvance profile as a string."""
122138
return "ProvenanceProfile <{}>".format(
@@ -223,7 +239,8 @@ def declare_process(
223239
# cmd = ga_export_jobs_attrs["command_line"]
224240
process_name = ga_export_jobs_attrs["tool_id"]
225241
# tool_version = ga_export_jobs_attrs["tool_version"]
226-
prov_label = "Run of ga_export/jobs_attrs.txt#" + process_name
242+
# TODO: insert workflow id
243+
prov_label = "Run of workflow_id_placeholder" + process_name
227244
start_time = ga_export_jobs_attrs["create_time"]
228245
end_time = ga_export_jobs_attrs["update_time"]
229246

tools/load_ga_export.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,21 @@ def parse_ga_jobs_attrs(self, job_attrs):
4848

4949
class GalaxyDataset(Dict):
5050

51-
def __init__(self, ga_export_dataset_attrs):
51+
def __init__(self):
5252
"""
5353
Initialize the GalaxyDataset object.
5454
"""
5555
self.attributes = {}
5656
self.attributes["metadata"] = {}
5757

58-
def parse_ga_dataset_attrs(self, job_attrs):
58+
def parse_ga_dataset_attrs(self, dataset_attrs):
5959

60-
for key, value in job_attrs.items():
60+
for key, value in dataset_attrs.items():
6161
if not isinstance(value, dict):
6262
self.attributes[key] = value
6363
else:
6464
if len(value) == 0:
6565
pass
6666
else:
6767
if "metadata" in key:
68-
self.attributes["metadata"].update(job_attrs[key])
68+
self.attributes["metadata"].update(dataset_attrs[key])

0 commit comments

Comments
 (0)