forked from G-Node/GCA-Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgca-doi
executable file
·122 lines (99 loc) · 4.18 KB
/
gca-doi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import codecs
import sys
import datetime
from lxml import etree
from lxml.builder import ElementMaker
from gca.core import Abstract
def gen_dois(abstracts, prefix):
for idx, a in enumerate(abstracts):
a.doi = "%s%04d" % (prefix, idx+1)
data = Abstract.to_json(abstracts)
sys.stdout.write(data.encode('utf-8'))
def gen_xml_md_abstract_text(abstract, do_it=True):
if not do_it:
return []
E = ElementMaker(namespace="http://datacite.org/schema/kernel-3",
nsmap={None: "http://datacite.org/schema/kernel-3"})
try:
text = abstract.text
if type(text) is not unicode:
text = unicode(abstract.text, errors='ignore')
elms = E.descriptions(E.description(text, descriptionType='Abstract'))
except (UnicodeEncodeError, ValueError) as e:
print('[E] text [%s]\n -> %s' % (abstract.uuid, e), file=sys.stderr)
return []
return elms
def gen_xml_metadata(abstract):
doi = abstract.doi
today = datetime.date.today()
E = ElementMaker(namespace="http://datacite.org/schema/kernel-3",
nsmap={None: "http://datacite.org/schema/kernel-3"})
doc = E.resource(
E.identifier(doi, identifierType='DOI'),
E.creators(*[E.creator(E.creatorName(au.format_name(True))) for au in abstract.authors]),
E.titles(E.title(abstract.title.replace('$', ''))),
E.subjects(E.subject('Bernstein Conference'),
E.subject('Computational Neuroscience')),
E.publisher('G-Node'),
E.publicationYear(str(today.year)),
E.contributors(E.contributor(E.contributorName('German Neuroinformatics Node'),
contributorType='DataManager')),
E.dates(E.date(today.isoformat(), dateType='Issued')),
E.language('eng'),
E.resourceType('Conference Abstract', resourceTypeGeneral='Text'),
*gen_xml_md_abstract_text(abstract)
)
return doc
def gen_xml_doi_data(abstract):
E = ElementMaker(namespace="http://datacite.org/schema/kernel-3",
nsmap={None: "http://datacite.org/schema/kernel-3"})
if not abstract.doi:
sys.stderr.write('[W] %s has NO DOI\n' % abstract.uuid)
return None
try:
x = gen_xml_metadata(abstract)
doc = E.DOIdata(
E.DOI(abstract.doi),
E.URL(u'http://abstracts.g-node.org/abstracts/%s' % abstract.uuid),
x
)
except ValueError, e:
print("[E] %s (%s)\n -> %s" % (abstract.uuid, abstract.doi, str(e)), file=sys.stderr)
doc = None
return doc
def gen_xml(abstracts):
E = ElementMaker(namespace="http://datacite.org/schema/kernel-3",
nsmap={None: "http://datacite.org/schema/kernel-3"})
docs = [gen_xml_doi_data(a) for a in abstracts]
docs = filter(lambda x: x is not None, docs)
doc = E.resources(*docs)
data = etree.tostring(doc, pretty_print=True)
sys.stdout.write(data.encode('utf-8'))
print('%d of %d abstract\'s xml generated' % (len(docs), len(abstracts)), file=sys.stderr)
diff = len(abstracts) - len(docs)
if diff > 0:
print('[W] %d abstract(s) not converted!!' % diff, file=sys.stderr)
def read_abstracts(path):
fd = codecs.open(path, 'r', encoding='utf-8') if path != '-' else sys.stdin
abstracts = Abstract.from_data(fd.read())
return abstracts
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCA doi - generate DOIs for abstracts')
parser.add_argument('file', type=str, default='-')
subparsers = parser.add_subparsers(help='commands')
sub_parser = subparsers.add_parser("generate", help='generate dois')
sub_parser.add_argument('prefix', type=str)
sub_parser.set_defaults(cmd='gen')
sub_parser = subparsers.add_parser("xml", help='generate xml for uploading')
sub_parser.set_defaults(cmd='xml')
args = parser.parse_args()
abs_list = read_abstracts(args.file)
if args.cmd == 'gen':
gen_dois(abs_list, args.prefix)
elif args.cmd == 'xml':
gen_xml(abs_list)