From d5f846c72a8262694a78ee2cdc21a6dadbe81d2b Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 10 May 2023 13:15:14 -0600 Subject: [PATCH 1/7] has human check --- qiita_db/analysis.py | 2 +- .../artifact_handlers/base_handlers.py | 20 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py index 2fa67acd3..850863531 100644 --- a/qiita_db/analysis.py +++ b/qiita_db/analysis.py @@ -145,7 +145,7 @@ def create(cls, owner, name, description, from_default=False, If the duplicated sample ids in the selected studies should be merged or prepended with the artifact ids. False (default) prepends the artifact id - categories : set of str, optional + categories : list of str, optional If not None, use _only_ these categories for the metaanalysis Returns diff --git a/qiita_pet/handlers/artifact_handlers/base_handlers.py b/qiita_pet/handlers/artifact_handlers/base_handlers.py index a2955f274..8f88faa72 100644 --- a/qiita_pet/handlers/artifact_handlers/base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/base_handlers.py @@ -43,8 +43,24 @@ def check_artifact_access(user, artifact): """ if user.level in ('admin', 'wet-lab admin'): return - if artifact.visibility != 'public': - study = artifact.study + + study = artifact.study + if artifact.visibility == 'public': + # if it's public we need to confirm that this artifact has no possible + # human sequences + has_human = False + if artifact.artifact_type == 'per_sample_FASTQ': + st = study.sample_template + if 'env_package' in st.categories: + asamples = {s for pt in artifact.prep_templates for s in pt} + hh = [1 for s, v in st.get_category('env_package').items() + if s in asamples and v.startswith('human-')] + if hh: + has_human = True + if has_human and not study.has_access(user): + raise QiitaHTTPError(403, "Access denied to artifact %s" + % artifact.id) + else: analysis = artifact.analysis if study: if not study.has_access(user): From 8d2cf5e68f04055627481902cfbbb5689642ed94 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 10 May 2023 13:47:54 -0600 Subject: [PATCH 2/7] pandas<2.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1ac6cc04b..961edf362 100644 --- a/setup.py +++ b/setup.py @@ -102,7 +102,7 @@ scripts=glob('scripts/*'), # making sure that numpy is installed before biom setup_requires=['numpy', 'cython'], - install_requires=['psycopg2', 'click', 'bcrypt', 'pandas', + install_requires=['psycopg2', 'click', 'bcrypt', 'pandas<2.0', 'biom-format', 'tornado<6.0', 'toredis', 'redis', 'scp', 'pyparsing', 'h5py', 'natsort', 'nose', 'pep8', 'networkx', 'humanize', 'wtforms<3.0.0', 'nltk', From 920d1714214bdc133ba88b0d6e75ccdb3ba71ec4 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 10 May 2023 14:30:31 -0600 Subject: [PATCH 3/7] Artifact.has_human --- qiita_db/artifact.py | 15 +++++++++++++++ .../handlers/artifact_handlers/base_handlers.py | 11 +---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 3a5baf5ef..6ff38639e 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -43,6 +43,7 @@ class Artifact(qdb.base.QiitaObject): prep_template ebi_run_accession study + has_human Methods ------- @@ -1550,6 +1551,20 @@ def being_deleted_by(self): res = qdb.sql_connection.TRN.execute_fetchindex() return qdb.processing_job.ProcessingJob(res[0][0]) if res else None + @property + def has_human(self): + has_human = False + if self.artifact_type == 'per_sample_FASTQ': + st = self.study.sample_template + if 'env_package' in st.categories: + asamples = {s for pt in self.prep_templates for s in pt} + for s, v in st.get_category('env_package').items(): + if s in asamples and v.startswith('human-'): + has_human = True + break + + return has_human + def jobs(self, cmd=None, status=None, show_hidden=False): """Jobs that used this artifact as input diff --git a/qiita_pet/handlers/artifact_handlers/base_handlers.py b/qiita_pet/handlers/artifact_handlers/base_handlers.py index 8f88faa72..2bf5d4e77 100644 --- a/qiita_pet/handlers/artifact_handlers/base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/base_handlers.py @@ -48,16 +48,7 @@ def check_artifact_access(user, artifact): if artifact.visibility == 'public': # if it's public we need to confirm that this artifact has no possible # human sequences - has_human = False - if artifact.artifact_type == 'per_sample_FASTQ': - st = study.sample_template - if 'env_package' in st.categories: - asamples = {s for pt in artifact.prep_templates for s in pt} - hh = [1 for s, v in st.get_category('env_package').items() - if s in asamples and v.startswith('human-')] - if hh: - has_human = True - if has_human and not study.has_access(user): + if artifact.has_human and not study.has_access(user, True): raise QiitaHTTPError(403, "Access denied to artifact %s" % artifact.id) else: From 03429ca4531762841ef564891c080bf8f90351b9 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 10 May 2023 16:08:29 -0600 Subject: [PATCH 4/7] adding test and using SQL --- qiita_db/artifact.py | 17 ++++++++++---- qiita_db/test/test_artifact.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 6ff38639e..79cf64de0 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1557,11 +1557,18 @@ def has_human(self): if self.artifact_type == 'per_sample_FASTQ': st = self.study.sample_template if 'env_package' in st.categories: - asamples = {s for pt in self.prep_templates for s in pt} - for s, v in st.get_category('env_package').items(): - if s in asamples and v.startswith('human-'): - has_human = True - break + sql = f"""SELECT DISTINCT sample_values->>'env_package' + FROM qiita.sample_{st.id} WHERE sample_id in ( + SELECT sample_id from qiita.preparation_artifact + LEFT JOIN qiita.prep_template_sample USING ( + prep_template_id) + WHERE artifact_id = {self.id})""" + with qdb.sql_connection.TRN: + qdb.sql_connection.TRN.add(sql) + for v in qdb.sql_connection.TRN.execute_fetchflatten(): + if v.startswith('human-'): + has_human = True + break return has_human diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 8ba2577d5..84c4b653c 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -677,6 +677,28 @@ def setUp(self): self._clean_up_files = [self.fp1, self.fp2, self.fp3, self.fp4] + # per_sample_FASTQ Metagenomic example + + self.prep_template_per_sample_fastq = \ + qdb.metadata_template.prep_template.PrepTemplate.create( + metadata, qdb.study.Study(1), "Metagenomic") + fd, self.fwd = mkstemp(prefix='SKB8.640193', suffix='_R1.fastq') + close(fd) + with open(self.fwd, 'w') as f: + f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") + fd, self.rev = mkstemp(prefix='SKB8.640193', suffix='_R2.fastq') + close(fd) + with open(self.rev, 'w') as f: + f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") + + self._clean_up_files.extend([self.fwd, self.rev]) + def tearDown(self): for f in self._clean_up_files: if exists(f): @@ -1364,6 +1386,26 @@ def test_descendants_with_jobs_one_element(self): exp = [('artifact', artifact)] self.assertCountEqual(obs, exp) + def test_has_human(self): + # testing a FASTQ artifact (1), should be False + self.assertFalse(qdb.artifact.Artifact(1).has_human) + + # create a per_sample_FASTQ + artifact = qdb.artifact.Artifact.create( + [(self.fwd, 1), (self.rev, 2)], "per_sample_FASTQ", + prep_template=self.prep_template_per_sample_fastq) + + # this should be False as there are no human samples + self.assertFalse(artifact.has_human) + + # let's make it True by making the samle human-* + df = pd.DataFrame.from_dict( + {'1.SKB8.640193': {'env_package': 'human-bla'}}, + orient='index', dtype=str) + artifact.study.sample_template.update(df) + + self.assertTrue(artifact.has_human) + @qiita_test_checker() class ArtifactArchiveTests(TestCase): From a8b92bdda6dae1b1fdaaa750dfbab6e0d9825bb6 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 11 May 2023 06:11:03 -0600 Subject: [PATCH 5/7] bla -> oral --- qiita_db/test/test_artifact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 84c4b653c..b9ba78149 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1400,7 +1400,7 @@ def test_has_human(self): # let's make it True by making the samle human-* df = pd.DataFrame.from_dict( - {'1.SKB8.640193': {'env_package': 'human-bla'}}, + {'1.SKB8.640193': {'env_package': 'human-oral'}}, orient='index', dtype=str) artifact.study.sample_template.update(df) From fefb57b0c0dc8f06e571a762a1c28192da795a61 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 11 May 2023 07:04:20 -0600 Subject: [PATCH 6/7] decorate download.py --- qiita_pet/handlers/download.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index ba7e54131..fc6789b83 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -235,7 +235,7 @@ def get(self, study_id): (self.current_user in study.shared_with))) for a in study.artifacts(artifact_type='BIOM'): - if full_access or a.visibility == 'public': + if full_access or a.visibility == 'public' and not a.has_human: to_download.extend(self._list_artifact_files_nginx(a)) self._write_nginx_file_list(to_download) @@ -289,7 +289,7 @@ def get(self, study_id): to_download = [] for a in study.artifacts(): if not a.parents: - if not is_owner and a.visibility != 'public': + if not is_owner and (a.visibility != 'public' or a.has_human): continue to_download.extend(self._list_artifact_files_nginx(a)) @@ -460,7 +460,7 @@ def get(self): artifacts = study.artifacts( dtype=data_type, artifact_type='BIOM') for a in artifacts: - if a.visibility != 'public': + if a.visibility != 'public' or a.has_human: continue to_download.extend(self._list_artifact_files_nginx(a)) @@ -498,6 +498,10 @@ def get(self): raise HTTPError(404, reason='Artifact is not public. If ' 'this is a mistake contact: ' 'qiita.help@gmail.com') + elif artifact.has_human: + raise HTTPError(404, reason='Artifact has possible human ' + 'sequences. If this is a mistake contact: ' + 'qiita.help@gmail.com') else: to_download = self._list_artifact_files_nginx(artifact) if not to_download: From ad459a3796332e6cbf03aecd3afd8bb5c26cbd00 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 11 May 2023 12:37:43 -0600 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Daniel McDonald --- qiita_pet/handlers/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index fc6789b83..93c81d4c0 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -235,7 +235,7 @@ def get(self, study_id): (self.current_user in study.shared_with))) for a in study.artifacts(artifact_type='BIOM'): - if full_access or a.visibility == 'public' and not a.has_human: + if full_access or (a.visibility == 'public' and not a.has_human): to_download.extend(self._list_artifact_files_nginx(a)) self._write_nginx_file_list(to_download)