Skip to content

Commit fdad618

Browse files
committed
Merge branch 'dev' of github.com:biocore/qiita
2 parents d8cb8db + ba13403 commit fdad618

File tree

16 files changed

+249
-151
lines changed

16 files changed

+249
-151
lines changed

CHANGELOG.md

+16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
# Qiita changelog
22

3+
Version 2025.02
4+
---------------
5+
6+
Deployed on February 24th, 2025
7+
8+
* Replaced os.rename for shutil.move in the code to fix [#3455](https://github.com/qiita-spots/qiita/issues/3455).
9+
* Via qp-spades, replaced the legacy `spades` command for `cloudSPAdes` for TellSeq.
10+
* `FASTA_preprocessed` within qtp-sequencing now allows for results to be named using their sample-name, extra from run-prefix.
11+
* `Remove SynDNA inserts & plasmid reads` superseded `Remove SynDNA reads`, which now removes SynDna inserts and plasmids.
12+
* `update_resource_allocation_redis` now relies on using equations stored in the database vs. hardcoded; thank you @Gossty!
13+
* SPP: Updated prep-info file generation to identify and report filtered fastq files that could not be matched to a sample-id instead of silently ignoring them.
14+
* SPP: Removed legacy test code and example files for amplicon processing. Some other tests updated and repurposed.
15+
* SPP: jobs are now easier to restart.
16+
* SPP: MultiQC report generation is now a separate slurm job & use jinja2 templates; also FastQC use jinja2 templates.
17+
18+
319
Version 2025.01
420
---------------
521

qiita_core/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
# The full license is in the file LICENSE, distributed with this software.
77
# -----------------------------------------------------------------------------
88

9-
__version__ = "2025.01"
9+
__version__ = "2025.02"

qiita_db/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from . import user
2828
from . import processing_job
2929

30-
__version__ = "2025.01"
30+
__version__ = "2025.02"
3131

3232
__all__ = ["analysis", "artifact", "archive", "base", "commands",
3333
"environment_manager", "exceptions", "investigation", "logger",

qiita_db/meta_util.py

+17-19
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
#
2323
# The full license is in the file LICENSE, distributed with this software.
2424
# -----------------------------------------------------------------------------
25-
from os import stat, rename
25+
from os import stat
26+
from shutil import move
2627
from os.path import join, relpath, basename
2728
from time import strftime, localtime
2829
import matplotlib.pyplot as plt
@@ -48,6 +49,8 @@
4849
"sName", "sVersion", "cID", "cName", "processing_job_id",
4950
"parameters", "samples", "columns", "input_size", "extra_info",
5051
"MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"]
52+
RAW_DATA_ARTIFACT_TYPE = {
53+
'SFF', 'FASTQ', 'FASTA', 'FASTA_Sanger', 'per_sample_FASTQ'}
5154

5255

5356
def _get_data_fpids(constructor, object_id):
@@ -118,9 +121,7 @@ def validate_filepath_access_by_user(user, filepath_id):
118121

119122
if artifact.visibility == 'public':
120123
# TODO: https://github.com/biocore/qiita/issues/1724
121-
if artifact.artifact_type in ['SFF', 'FASTQ', 'FASTA',
122-
'FASTA_Sanger',
123-
'per_sample_FASTQ']:
124+
if artifact.artifact_type in RAW_DATA_ARTIFACT_TYPE:
124125
study = artifact.study
125126
has_access = study.has_access(user, no_public=True)
126127
if (not study.public_raw_download and not has_access):
@@ -469,7 +470,7 @@ def generate_biom_and_metadata_release(study_status='public'):
469470
for c in iter(lambda: f.read(4096), b""):
470471
md5sum.update(c)
471472

472-
rename(tgz_name, tgz_name_final)
473+
move(tgz_name, tgz_name_final)
473474

474475
vals = [
475476
('filepath', tgz_name_final[len(working_dir):], r_client.set),
@@ -543,7 +544,7 @@ def generate_plugin_releases():
543544
md5sum = md5()
544545
for c in iter(lambda: f.read(4096), b""):
545546
md5sum.update(c)
546-
rename(tgz_name, tgz_name_final)
547+
move(tgz_name, tgz_name_final)
547548
vals = [
548549
('filepath', tgz_name_final[len(working_dir):], r_client.set),
549550
('md5sum', md5sum.hexdigest(), r_client.set),
@@ -593,7 +594,7 @@ def update_resource_allocation_redis(active=True):
593594
if len(df) == 0:
594595
continue
595596

596-
fig, axs = resource_allocation_plot(df, cname, sname, col_name)
597+
fig, axs = resource_allocation_plot(df, col_name)
597598
titles = [0, 0]
598599
images = [0, 0]
599600

@@ -605,21 +606,18 @@ def update_resource_allocation_redis(active=True):
605606
# only time
606607
new_fig = plt.figure()
607608
new_ax = new_fig.add_subplot(111)
608-
609-
scatter_data = ax.collections[0]
610-
new_ax.scatter(scatter_data.get_offsets()[:, 0],
611-
scatter_data.get_offsets()[:, 1],
612-
s=scatter_data.get_sizes(), label="data")
613-
614609
line = ax.lines[0]
615610
new_ax.plot(line.get_xdata(), line.get_ydata(),
616611
linewidth=1, color='orange')
617-
618-
if len(ax.collections) > 1:
619-
failure_data = ax.collections[1]
620-
new_ax.scatter(failure_data.get_offsets()[:, 0],
621-
failure_data.get_offsets()[:, 1],
622-
color='red', s=3, label="failures")
612+
handles, labels = ax.get_legend_handles_labels()
613+
for handle, label, scatter_data in zip(handles,
614+
labels,
615+
ax.collections):
616+
color = handle.get_facecolor()
617+
new_ax.scatter(scatter_data.get_offsets()[:, 0],
618+
scatter_data.get_offsets()[:, 1],
619+
s=scatter_data.get_sizes(), label=label,
620+
color=color)
623621

624622
new_ax.set_xscale('log')
625623
new_ax.set_yscale('log')

qiita_db/support_files/patches/94.sql

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Jan 13, 2025
2+
-- Adding a table for formulas for resource allocations
3+
CREATE TABLE qiita.allocation_equations (
4+
equation_id SERIAL PRIMARY KEY,
5+
equation_name TEXT NOT NULL,
6+
expression TEXT NOT NULL
7+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
INSERT INTO qiita.allocation_equations(equation_name, expression)
2+
VALUES
3+
('mem_model1', '(k * (np.log(x))) + (x * a) + b'),
4+
('mem_model2', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + a'),
5+
('mem_model3', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.np.log(x))**3))'),
6+
('mem_model4', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.log(x))**2.5))'),
7+
('time_model1', 'a + b + ((np.log(x)) * k)'),
8+
('time_model2', 'a + (b * x) + ((np.log(x)) * k)'),
9+
('time_model3', 'a + (b * ((np.log(x))**2)) + ((np.log(x)) * k)'),
10+
('time_model4', '(a * ((np.log(x))**3)) + (b * ((np.log(x))**2)) + ((np.log(x)) * k)');

qiita_db/support_files/populate_test_db.sql

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ INSERT INTO qiita.artifact_type VALUES (5, 'per_sample_FASTQ', NULL, true, false
8888
INSERT INTO qiita.artifact_type VALUES (7, 'BIOM', 'BIOM table', false, false, true);
8989

9090

91+
9192
--
9293
-- Data for Name: data_type; Type: TABLE DATA; Schema: qiita; Owner: antoniog
9394
--

qiita_db/test/test_meta_util.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -529,16 +529,19 @@ def test_update_resource_allocation_redis(self):
529529
cname, sname, version, col_name, 'title_mem')
530530
title_mem = str(r_client.get(title_mem_str))
531531
self.assertTrue(
532-
"model: "
533-
"k * log(x) + "
534-
"b * log(x)^2 + "
535-
"a * log(x)^3" in title_mem
532+
"model: (k * (np.log(x))) + "
533+
"(b * ((np.log(x))**2)) + "
534+
"(a * ((np.log(x))**2.5))" in title_mem
536535
)
537536

538537
title_time_str = 'resources$#%s$#%s$#%s$#%s:%s' % (
539538
cname, sname, version, col_name, 'title_time')
540539
title_time = str(r_client.get(title_time_str))
541-
self.assertTrue("model: a + b + log(x) * k" in title_time)
540+
self.assertTrue(
541+
"model: (a * ((np.log(x))**3)) + "
542+
"(b * ((np.log(x))**2)) + "
543+
"((np.log(x)) * k)" in title_time
544+
)
542545

543546

544547
if __name__ == '__main__':

qiita_db/test/test_util.py

+22-23
Original file line numberDiff line numberDiff line change
@@ -1329,8 +1329,7 @@ def setUp(self):
13291329

13301330
def test_plot_return(self):
13311331
# check the plot returns correct objects
1332-
fig1, axs1 = qdb.util.resource_allocation_plot(
1333-
self.df, self.cname, self.sname, self.col_name)
1332+
fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name)
13341333
self.assertIsInstance(
13351334
fig1, Figure,
13361335
"Returned object fig1 is not a Matplotlib Figure")
@@ -1346,46 +1345,46 @@ def test_minimize_const(self):
13461345
self.df[self.col_name] = self.df.samples * self.df['columns']
13471346
fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
13481347

1349-
bm, options = qdb.util._resource_allocation_plot_helper(
1350-
self.df, axs[0], self.cname, self.sname, 'MaxRSSRaw',
1351-
qdb.util.MODELS_MEM, self.col_name)
1348+
mem_models, time_models = qdb.util.retrieve_equations()
1349+
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
1350+
self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name)
13521351
# check that the algorithm chooses correct model for MaxRSSRaw and
13531352
# has 0 failures
13541353
k, a, b = options.x
1355-
failures_df = qdb.util._resource_allocation_failures(
1356-
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
1354+
failures_df = qdb.util._resource_allocation_success_failures(
1355+
self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1]
13571356
failures = failures_df.shape[0]
1358-
self.assertEqual(bm, qdb.util.mem_model3,
1357+
1358+
self.assertEqual(bm_name, 'mem_model4',
1359+
msg=f"""Best memory model
1360+
doesn't match
1361+
{bm_name} != 'mem_model4'""")
1362+
self.assertEqual(bm, mem_models['mem_model4']['equation'],
13591363
msg=f"""Best memory model
13601364
doesn't match
13611365
Coefficients:{k} {a} {b}
1362-
{qdb.util.mem_model1}, "qdb.util.mem_model1"
1363-
{qdb.util.mem_model2}, "qdb.util.mem_model2"
1364-
{qdb.util.mem_model3}, "qdb.util.mem_model3"
1365-
{qdb.util.mem_model4}, "qdb.util.mem_model4"
13661366
""")
13671367
self.assertEqual(failures, 0, "Number of failures must be 0")
13681368

13691369
# check that the algorithm chooses correct model for ElapsedRaw and
13701370
# has 1 failure
1371-
bm, options = qdb.util._resource_allocation_plot_helper(
1372-
self.df, axs[1], self.cname, self.sname, 'ElapsedRaw',
1373-
qdb.util.MODELS_TIME, self.col_name)
1371+
bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
1372+
self.df, axs[1], 'ElapsedRaw', time_models, self.col_name)
13741373
k, a, b = options.x
1375-
failures_df = qdb.util._resource_allocation_failures(
1376-
self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')
1374+
failures_df = qdb.util._resource_allocation_success_failures(
1375+
self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1]
13771376
failures = failures_df.shape[0]
1377+
self.assertEqual(bm_name, 'time_model4',
1378+
msg=f"""Best time model
1379+
doesn't match
1380+
{bm_name} != 'time_model4'""")
13781381

1379-
self.assertEqual(bm, qdb.util.time_model1,
1382+
self.assertEqual(bm, time_models[bm_name]['equation'],
13801383
msg=f"""Best time model
13811384
doesn't match
13821385
Coefficients:{k} {a} {b}
1383-
{qdb.util.time_model1}, "qdb.util.time_model1"
1384-
{qdb.util.time_model2}, "qdb.util.time_model2"
1385-
{qdb.util.time_model3}, "qdb.util.time_model3"
1386-
{qdb.util.time_model4}, "qdb.util.time_model4"
13871386
""")
1388-
self.assertEqual(failures, 1, "Number of failures must be 1")
1387+
self.assertEqual(failures, 0, "Number of failures must be 0")
13891388

13901389
def test_MaxRSS_helper(self):
13911390
tests = [

0 commit comments

Comments
 (0)