Skip to content

Add Modalities, Classification endpoints #1010

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .handlers.devicehandler import DeviceHandler
from .handlers.grouphandler import GroupHandler
from .handlers.listhandler import FileListHandler, NotesListHandler, PermissionsListHandler, TagsListHandler
from .handlers.modalityhandler import ModalityHandler
from .handlers.refererhandler import AnalysesHandler
from .handlers.reporthandler import ReportHandler
from .handlers.resolvehandler import ResolveHandler
Expand Down Expand Up @@ -177,6 +178,15 @@ def prefix(path, routes):
]),


# Modalities

route( '/modalities', ModalityHandler, h='get_all', m=['GET']),
route( '/modalities', ModalityHandler, m=['POST']),
prefix('/modalities', [
route('/<modality_name:[^/]+>', ModalityHandler, m=['GET', 'PUT', 'DELETE']),
]),


# Site

route('/<cid:site>/rules', RulesHandler, m=['GET', 'POST']),
Expand Down Expand Up @@ -268,13 +278,14 @@ def prefix(path, routes):
route('/<list_name:tags>', TagsListHandler, m=['POST']),
route('/<list_name:tags>/<value:{tag}>', TagsListHandler, m=['GET', 'PUT', 'DELETE']),

route('/packfile-start', FileListHandler, h='packfile_start', m=['POST']),
route('/packfile', FileListHandler, h='packfile', m=['POST']),
route('/packfile-end', FileListHandler, h='packfile_end'),
route('/<list_name:files>', FileListHandler, m=['POST']),
route('/<list_name:files>/<name:{fname}>', FileListHandler, m=['GET', 'PUT', 'DELETE']),
route('/<list_name:files>/<name:{fname}>/info', FileListHandler, h='get_info', m=['GET']),
route('/<list_name:files>/<name:{fname}>/info', FileListHandler, h='modify_info', m=['POST']),
route('/packfile-start', FileListHandler, h='packfile_start', m=['POST']),
route('/packfile', FileListHandler, h='packfile', m=['POST']),
route('/packfile-end', FileListHandler, h='packfile_end'),
route('/<list_name:files>', FileListHandler, m=['POST']),
route('/<list_name:files>/<name:{fname}>', FileListHandler, m=['GET', 'PUT', 'DELETE']),
route('/<list_name:files>/<name:{fname}>/info', FileListHandler, h='get_info', m=['GET']),
route('/<list_name:files>/<name:{fname}>/info', FileListHandler, h='modify_info', m=['POST']),
route('/<list_name:files>/<name:{fname}>/classification', FileListHandler, h='modify_classification', m=['POST']),

route( '/<sub_cont_name:{cname}|all>/analyses', AnalysesHandler, h='get_all', m=['GET']),
route( '/analyses', AnalysesHandler, h='get_all', m=['GET']),
Expand Down
71 changes: 0 additions & 71 deletions api/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import copy
import glob
import json
import logging
import pymongo
Expand Down Expand Up @@ -132,76 +131,6 @@ def apply_env_variables(config):
# validate the lists of json schemas
schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../swagger/schemas')

expected_mongo_schemas = set([
'acquisition.json',
'analysis.json',
'collection.json',
'container.json',
'file.json',
'group.json',
'note.json',
'permission.json',
'project.json',
'session.json',
'subject.json',
'user.json',
'avatars.json',
'tag.json'
])
expected_input_schemas = set([
'acquisition.json',
'acquisition-update.json',
'analysis.json',
'analysis-job.json',
'analysis-update.json',
'avatars.json',
'collection.json',
'collection-update.json',
'device.json',
'file.json',
'file-update.json',
'group-new.json',
'group-update.json',
'info_update.json',
'note.json',
'packfile.json',
'permission.json',
'project.json',
'project-template.json',
'project-update.json',
'rule-new.json',
'rule-update.json',
'session.json',
'session-update.json',
'subject.json',
'user-new.json',
'user-update.json',
'download.json',
'tag.json',
'enginemetadata.json',
'labelupload.json',
'uidupload.json',
'uidmatchupload.json'
])
mongo_schemas = set()
input_schemas = set()

# check that the lists of schemas are correct
for schema_filepath in glob.glob(schema_path + '/mongo/*.json'):
schema_file = os.path.basename(schema_filepath)
mongo_schemas.add(schema_file)
with open(schema_filepath, 'rU') as f:
pass

assert mongo_schemas == expected_mongo_schemas, '{} is different from {}'.format(mongo_schemas, expected_mongo_schemas)

for schema_filepath in glob.glob(schema_path + '/input/*.json'):
schema_file = os.path.basename(schema_filepath)
input_schemas.add(schema_file)
with open(schema_filepath, 'rU') as f:
pass

assert input_schemas == expected_input_schemas, '{} is different from {}'.format(input_schemas, expected_input_schemas)

def create_or_recreate_ttl_index(coll_name, index_name, ttl):
if coll_name in db.collection_names():
Expand Down
12 changes: 12 additions & 0 deletions api/dao/basecontainerstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,20 @@ def update_el(self, _id, payload, unset_payload=None, recursive=False, r_payload
raise APIStorageException(e.message)
if recursive and r_payload is not None:
containerutil.propagate_changes(self.cont_name, _id, {}, {'$set': util.mongo_dict(r_payload)})

config.log.warning(update)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be removed.

return self.dbc.update_one({'_id': _id}, update)

def replace_el(self, _id, payload):
if self.use_object_id:
try:
_id = bson.ObjectId(_id)
except bson.errors.InvalidId as e:
raise APIStorageException(e.message)
payload['_id'] = _id
return self.dbc.replace_one({'_id': _id}, payload)


def delete_el(self, _id):
if self.use_object_id:
try:
Expand Down
10 changes: 9 additions & 1 deletion api/dao/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,15 @@ def check_req(cont, req_k, req_v):
"""
Return True if container satisfies specific requirement.
"""
cont_v = cont.get(req_k)

# If looking at classification, translate to list rather than dictionary
if req_k == 'classification':
cont_v = []
for v in cont.get('classification', {}).itervalues():
cont_v.extend(v)
else:
cont_v = cont.get(req_k)

if cont_v:
if isinstance(req_v, dict):
for k,v in req_v.iteritems():
Expand Down
85 changes: 72 additions & 13 deletions api/dao/liststorage.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import bson.errors
import bson.objectid
import copy
import datetime
import pymongo

from ..web.errors import APIStorageException, APIConflictException, APINotFoundException
from . import consistencychecker
from .. import config
from .. import util
from ..jobs import rules
from ..handlers.modalityhandler import check_and_format_classification
from .containerstorage import SessionStorage, AcquisitionStorage

log = config.log
Expand Down Expand Up @@ -115,12 +116,23 @@ def _get_el(self, _id, query_params):
if result and result.get(self.list_name):
return result.get(self.list_name)[0]

def _update_session_compliance(self, _id):
if self.cont_name in ['sessions', 'acquisitions']:
if self.cont_name == 'sessions':
session_id = _id
else:
session_id = AcquisitionStorage().get_container(_id).get('session')
SessionStorage().recalc_session_compliance(session_id)


class FileStorage(ListStorage):

def __init__(self, cont_name):
super(FileStorage,self).__init__(cont_name, 'files', use_object_id=True)

def _create_jobs(self, container_before):
container_after = self.get_container(container_before['_id'])
return rules.create_jobs(config.db, container_before, container_after, self.cont_name)

def _update_el(self, _id, query_params, payload, exclude_params):
container_before = self.get_container(_id)
Expand All @@ -145,11 +157,9 @@ def _update_el(self, _id, query_params, payload, exclude_params):
'$set': mod_elem
}

container_after = self.dbc.find_one_and_update(query, update, return_document=pymongo.collection.ReturnDocument.AFTER)
if not container_after:
raise APINotFoundException('Could not find and modify {} {}. file not updated'.format(_id, self.cont_name))

jobs_spawned = rules.create_jobs(config.db, container_before, container_after, self.cont_name)
self.dbc.find_one_and_update(query, update)
self._update_session_compliance(_id)
jobs_spawned = self._create_jobs(container_before)

return {
'modified': 1,
Expand All @@ -162,12 +172,7 @@ def _delete_el(self, _id, query_params):
if f['name'] == query_params['name']:
f['deleted'] = datetime.datetime.utcnow()
result = self.dbc.update_one({'_id': _id}, {'$set': {'files': files, 'modified': datetime.datetime.utcnow()}})
if self.cont_name in ['sessions', 'acquisitions']:
if self.cont_name == 'sessions':
session_id = _id
else:
session_id = AcquisitionStorage().get_container(_id).get('session')
SessionStorage().recalc_session_compliance(session_id)
self._update_session_compliance(_id)
return result

def _get_el(self, _id, query_params):
Expand Down Expand Up @@ -215,7 +220,61 @@ def modify_info(self, _id, query_params, payload):
else:
update['$set']['modified'] = datetime.datetime.utcnow()

return self.dbc.update_one(query, update)
result = self.dbc.update_one(query, update)
self._update_session_compliance(_id)
return result


def modify_classification(self, _id, query_params, payload):
container_before = self.get_container(_id)
update = {'$set': {'modified': datetime.datetime.utcnow()}}

if self.use_object_id:
_id = bson.objectid.ObjectId(_id)
query = {'_id': _id }
query[self.list_name] = {'$elemMatch': query_params}

modality = self.get_container(_id)['files'][0].get('modality') #TODO: make this more reliable if the file isn't there
add_payload = payload.get('add')
delete_payload = payload.get('delete')
replace_payload = payload.get('replace')

if (add_payload or delete_payload) and replace_payload is not None:
raise APIStorageException('Cannot add or delete AND replace classification fields.')

if replace_payload is not None:
replace_payload = check_and_format_classification(modality, replace_payload)

r_update = copy.deepcopy(update)
r_update['$set'][self.list_name + '.$.classification'] = util.mongo_sanitize_fields(replace_payload)

self.dbc.update_one(query, r_update)

else:
if add_payload:
add_payload = check_and_format_classification(modality, add_payload)

a_update = copy.deepcopy(update)
a_update['$addToSet'] = {}
for k,v in add_payload.iteritems():
a_update['$addToSet'][self.list_name + '.$.classification.' + k] = {'$each': v}

self.dbc.update_one(query, a_update)

if delete_payload:
delete_payload = check_and_format_classification(modality, delete_payload)

# TODO: Test to make sure $pull succeeds when key does not exist
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I've seen the test for this edge case, please remove TODO 👍

Copy link
Contributor Author

@nagem nagem Jan 29, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks 👍. We turned off the pylint warning for TODOs since there were so many in the system at that time. Would be nice to go back through and address any that can be addressed and move the rest to tickets so we can catch things that weren't meant to be longstanding TODOs. Not sure how much effort we want to put into formalizing what TODO means in the system, though 😉

d_update = copy.deepcopy(update)
d_update['$pullAll'] = {}
for k,v in delete_payload.iteritems():
d_update['$pullAll'][self.list_name + '.$.classification.' + k] = v

self.dbc.update_one(query, d_update)

self._update_session_compliance(_id)
self._create_jobs(container_before)



class StringListStorage(ListStorage):
Expand Down
3 changes: 1 addition & 2 deletions api/handlers/collectionshandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ def get_sessions(self, cid):
sessions = list(containerstorage.SessionStorage().get_all_el(query, None, projection))

self._filter_all_permissions(sessions, self.uid)
if self.is_true('measurements'):
self._add_session_measurements(sessions)

for sess in sessions:
sess = self.handle_origin(sess)
return sessions
Expand Down
17 changes: 0 additions & 17 deletions api/handlers/containerhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,6 @@ def get_all(self, cont_name, par_cont_name=None, par_id=None):
# the "count" flag add a count for each container returned
if self.is_true('counts'):
self._add_results_counts(results, cont_name)
# the "measurements" flag applies only to query for sessions
# and add a list of the measurements in the child acquisitions
if cont_name == 'sessions' and self.is_true('measurements'):
self._add_session_measurements(results)

modified_results = []
for result in results:
Expand Down Expand Up @@ -620,19 +616,6 @@ def _get_validators(self):
payload_validator = validators.from_schema_path(payload_schema_uri)
return mongo_validator, payload_validator

def _add_session_measurements(self, results):
session_measurements = config.db.acquisitions.aggregate([
{'$match': {'session': {'$in': [sess['_id'] for sess in results]}}},
{'$project': { '_id': '$session', 'files':1 }},
{'$unwind': '$files'},
{'$project': { '_id': '$_id', 'files.measurements': 1}},
{'$unwind': '$files.measurements'},
{'$group': {'_id': '$_id', 'measurements': {'$addToSet': '$files.measurements'}}}
])
session_measurements = {sess['_id']: sess['measurements'] for sess in session_measurements}
for sess in results:
sess['measurements'] = session_measurements.get(sess['_id'], None)

def _get_parent_container(self, payload):
if not self.config.get('parent_storage'):
return None, None
Expand Down
6 changes: 3 additions & 3 deletions api/handlers/dataexplorerhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@
"filter": {"term": {"container_type": "file"}},
"aggs": {

"file.measurements" : {
"file.classification" : {
"terms" : {
"field" : "file.measurements.raw",
"field" : "file.classification.raw",
"size" : 15,
"missing": "null"
}
Expand Down Expand Up @@ -278,7 +278,7 @@

SOURCE_FILE = SOURCE_ANALYSIS + [
"file.created",
"file.measurements",
"file.classification",
"file.name",
"file.size",
"file.type",
Expand Down
16 changes: 13 additions & 3 deletions api/handlers/listhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _initialize_request(self, cont_name, list_name, _id, query_params=None):
else:
permchecker = permchecker(self, container)
else:
self.abort(404, 'Element {} not found in container {}'.format(_id, storage.cont_name))
self.abort(404, 'Element {} not found in {} {}'.format(query_params.values()[0], containerutil.singularize(storage.cont_name), _id))

mongo_schema_uri = validators.schema_uri('mongo', conf.get('storage_schema_file'))
mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri)
Expand Down Expand Up @@ -539,8 +539,18 @@ def modify_info(self, cont_name, list_name, **kwargs):
# abort if the query of the update wasn't able to find any matching documents
if result.matched_count == 0:
self.abort(404, 'Element not updated in list {} of container {} {}'.format(storage.list_name, storage.cont_name, _id))
else:
return {'modified':result.modified_count}

def modify_classification(self, cont_name, list_name, **kwargs):
_id = kwargs.pop('cid')
permchecker, storage, _, _, _ = self._initialize_request(cont_name, list_name, _id, query_params=kwargs)

payload = self.request.json_body

validators.validate_data(payload, 'classification-update.json', 'input', 'POST')

permchecker(noop)('PUT', _id=_id, query_params=kwargs, payload=payload)
storage.modify_classification(_id, kwargs, payload)


def post(self, cont_name, list_name, **kwargs):
_id = kwargs.pop('cid')
Expand Down
Loading