Skip to content

Commit 9ec2ce5

Browse files
antgonzawasade
andauthored
has human check (#3284)
* has human check * pandas<2.0 * Artifact.has_human * adding test and using SQL * bla -> oral * decorate download.py * Apply suggestions from code review Co-authored-by: Daniel McDonald <[email protected]> --------- Co-authored-by: Daniel McDonald <[email protected]>
1 parent b8e151e commit 9ec2ce5

File tree

6 files changed

+82
-7
lines changed

6 files changed

+82
-7
lines changed

qiita_db/analysis.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def create(cls, owner, name, description, from_default=False,
145145
If the duplicated sample ids in the selected studies should be
146146
merged or prepended with the artifact ids. False (default) prepends
147147
the artifact id
148-
categories : set of str, optional
148+
categories : list of str, optional
149149
If not None, use _only_ these categories for the metaanalysis
150150
151151
Returns

qiita_db/artifact.py

+22
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class Artifact(qdb.base.QiitaObject):
4343
prep_template
4444
ebi_run_accession
4545
study
46+
has_human
4647
4748
Methods
4849
-------
@@ -1550,6 +1551,27 @@ def being_deleted_by(self):
15501551
res = qdb.sql_connection.TRN.execute_fetchindex()
15511552
return qdb.processing_job.ProcessingJob(res[0][0]) if res else None
15521553

1554+
@property
1555+
def has_human(self):
1556+
has_human = False
1557+
if self.artifact_type == 'per_sample_FASTQ':
1558+
st = self.study.sample_template
1559+
if 'env_package' in st.categories:
1560+
sql = f"""SELECT DISTINCT sample_values->>'env_package'
1561+
FROM qiita.sample_{st.id} WHERE sample_id in (
1562+
SELECT sample_id from qiita.preparation_artifact
1563+
LEFT JOIN qiita.prep_template_sample USING (
1564+
prep_template_id)
1565+
WHERE artifact_id = {self.id})"""
1566+
with qdb.sql_connection.TRN:
1567+
qdb.sql_connection.TRN.add(sql)
1568+
for v in qdb.sql_connection.TRN.execute_fetchflatten():
1569+
if v.startswith('human-'):
1570+
has_human = True
1571+
break
1572+
1573+
return has_human
1574+
15531575
def jobs(self, cmd=None, status=None, show_hidden=False):
15541576
"""Jobs that used this artifact as input
15551577

qiita_db/test/test_artifact.py

+42
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,28 @@ def setUp(self):
677677

678678
self._clean_up_files = [self.fp1, self.fp2, self.fp3, self.fp4]
679679

680+
# per_sample_FASTQ Metagenomic example
681+
682+
self.prep_template_per_sample_fastq = \
683+
qdb.metadata_template.prep_template.PrepTemplate.create(
684+
metadata, qdb.study.Study(1), "Metagenomic")
685+
fd, self.fwd = mkstemp(prefix='SKB8.640193', suffix='_R1.fastq')
686+
close(fd)
687+
with open(self.fwd, 'w') as f:
688+
f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
689+
"NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
690+
"+\n"
691+
"#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
692+
fd, self.rev = mkstemp(prefix='SKB8.640193', suffix='_R2.fastq')
693+
close(fd)
694+
with open(self.rev, 'w') as f:
695+
f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
696+
"NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
697+
"+\n"
698+
"#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
699+
700+
self._clean_up_files.extend([self.fwd, self.rev])
701+
680702
def tearDown(self):
681703
for f in self._clean_up_files:
682704
if exists(f):
@@ -1364,6 +1386,26 @@ def test_descendants_with_jobs_one_element(self):
13641386
exp = [('artifact', artifact)]
13651387
self.assertCountEqual(obs, exp)
13661388

1389+
def test_has_human(self):
1390+
# testing a FASTQ artifact (1), should be False
1391+
self.assertFalse(qdb.artifact.Artifact(1).has_human)
1392+
1393+
# create a per_sample_FASTQ
1394+
artifact = qdb.artifact.Artifact.create(
1395+
[(self.fwd, 1), (self.rev, 2)], "per_sample_FASTQ",
1396+
prep_template=self.prep_template_per_sample_fastq)
1397+
1398+
# this should be False as there are no human samples
1399+
self.assertFalse(artifact.has_human)
1400+
1401+
# let's make it True by making the samle human-*
1402+
df = pd.DataFrame.from_dict(
1403+
{'1.SKB8.640193': {'env_package': 'human-oral'}},
1404+
orient='index', dtype=str)
1405+
artifact.study.sample_template.update(df)
1406+
1407+
self.assertTrue(artifact.has_human)
1408+
13671409

13681410
@qiita_test_checker()
13691411
class ArtifactArchiveTests(TestCase):

qiita_pet/handlers/artifact_handlers/base_handlers.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,15 @@ def check_artifact_access(user, artifact):
4343
"""
4444
if user.level in ('admin', 'wet-lab admin'):
4545
return
46-
if artifact.visibility != 'public':
47-
study = artifact.study
46+
47+
study = artifact.study
48+
if artifact.visibility == 'public':
49+
# if it's public we need to confirm that this artifact has no possible
50+
# human sequences
51+
if artifact.has_human and not study.has_access(user, True):
52+
raise QiitaHTTPError(403, "Access denied to artifact %s"
53+
% artifact.id)
54+
else:
4855
analysis = artifact.analysis
4956
if study:
5057
if not study.has_access(user):

qiita_pet/handlers/download.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def get(self, study_id):
235235
(self.current_user in study.shared_with)))
236236

237237
for a in study.artifacts(artifact_type='BIOM'):
238-
if full_access or a.visibility == 'public':
238+
if full_access or (a.visibility == 'public' and not a.has_human):
239239
to_download.extend(self._list_artifact_files_nginx(a))
240240

241241
self._write_nginx_file_list(to_download)
@@ -289,7 +289,7 @@ def get(self, study_id):
289289
to_download = []
290290
for a in study.artifacts():
291291
if not a.parents:
292-
if not is_owner and a.visibility != 'public':
292+
if not is_owner and (a.visibility != 'public' or a.has_human):
293293
continue
294294
to_download.extend(self._list_artifact_files_nginx(a))
295295

@@ -460,7 +460,7 @@ def get(self):
460460
artifacts = study.artifacts(
461461
dtype=data_type, artifact_type='BIOM')
462462
for a in artifacts:
463-
if a.visibility != 'public':
463+
if a.visibility != 'public' or a.has_human:
464464
continue
465465
to_download.extend(self._list_artifact_files_nginx(a))
466466

@@ -498,6 +498,10 @@ def get(self):
498498
raise HTTPError(404, reason='Artifact is not public. If '
499499
'this is a mistake contact: '
500500
501+
elif artifact.has_human:
502+
raise HTTPError(404, reason='Artifact has possible human '
503+
'sequences. If this is a mistake contact: '
504+
501505
else:
502506
to_download = self._list_artifact_files_nginx(artifact)
503507
if not to_download:

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102
scripts=glob('scripts/*'),
103103
# making sure that numpy is installed before biom
104104
setup_requires=['numpy', 'cython'],
105-
install_requires=['psycopg2', 'click', 'bcrypt', 'pandas',
105+
install_requires=['psycopg2', 'click', 'bcrypt', 'pandas<2.0',
106106
'biom-format', 'tornado<6.0', 'toredis', 'redis',
107107
'scp', 'pyparsing', 'h5py', 'natsort', 'nose', 'pep8',
108108
'networkx', 'humanize', 'wtforms<3.0.0', 'nltk',

0 commit comments

Comments
 (0)