qiita-spots
diff --git a/‎CHANGELOG.md
+16 b/‎CHANGELOG.md
+16
diff --git a/‎qiita_core/__init__.py
+1-1 b/‎qiita_core/__init__.py
+1-1
diff --git a/‎qiita_db/__init__.py
+1-1 b/‎qiita_db/__init__.py
+1-1
diff --git a/‎qiita_db/meta_util.py
+17-19 b/‎qiita_db/meta_util.py
+17-19
diff --git a/‎qiita_db/support_files/patches/94.sql
+7 b/‎qiita_db/support_files/patches/94.sql
+7
diff --git a/‎qiita_db/support_files/patches/test_db_sql/94.sql
+10 b/‎qiita_db/support_files/patches/test_db_sql/94.sql
+10
diff --git a/‎qiita_db/support_files/populate_test_db.sql
+1 b/‎qiita_db/support_files/populate_test_db.sql
+1
diff --git a/‎qiita_db/test/test_meta_util.py
+8-5 b/‎qiita_db/test/test_meta_util.py
+8-5
diff --git a/‎qiita_db/test/test_util.py
+22-23 b/‎qiita_db/test/test_util.py
+22-23
@@ -1,5 +1,21 @@
 # Qiita changelog
 
+Version 2025.02
+---------------
+
+Deployed on February 24th, 2025
+
+* Replaced os.rename for shutil.move in the code to fix [#3455](https://github.com/qiita-spots/qiita/issues/3455).
+* Via qp-spades, replaced the legacy `spades` command for `cloudSPAdes` for TellSeq.
+* `FASTA_preprocessed` within qtp-sequencing now allows for results to be named using their sample-name, extra from run-prefix.
+* `Remove SynDNA inserts & plasmid reads` superseded `Remove SynDNA reads`, which now removes SynDna inserts and plasmids.
+* `update_resource_allocation_redis` now relies on using equations stored in the database vs. hardcoded; thank you @Gossty!
+* SPP: Updated prep-info file generation to identify and report filtered fastq files that could not be matched to a sample-id instead of silently ignoring them.
+* SPP: Removed legacy test code and example files for amplicon processing. Some other tests updated and repurposed.
+* SPP: jobs are now easier to restart.
+* SPP: MultiQC report generation is now a separate slurm job & use jinja2 templates; also FastQC use jinja2 templates.
+
+
 Version 2025.01
 ---------------
 
 
@@ -6,4 +6,4 @@
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
 
-__version__ = "2025.01"
+__version__ = "2025.02"
@@ -27,7 +27,7 @@
 from . import user
 from . import processing_job
 
-__version__ = "2025.01"
+__version__ = "2025.02"
 
 __all__ = ["analysis", "artifact",  "archive", "base", "commands",
            "environment_manager", "exceptions", "investigation", "logger",
 
@@ -22,7 +22,8 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
-from os import stat, rename
+from os import stat
+from shutil import move
 from os.path import join, relpath, basename
 from time import strftime, localtime
 import matplotlib.pyplot as plt
@@ -48,6 +49,8 @@
     "sName", "sVersion", "cID", "cName", "processing_job_id",
     "parameters", "samples", "columns", "input_size", "extra_info",
     "MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"]
+RAW_DATA_ARTIFACT_TYPE = {
+        'SFF', 'FASTQ', 'FASTA', 'FASTA_Sanger', 'per_sample_FASTQ'}
 
 
 def _get_data_fpids(constructor, object_id):
@@ -118,9 +121,7 @@ def validate_filepath_access_by_user(user, filepath_id):
 
             if artifact.visibility == 'public':
                 # TODO: https://github.com/biocore/qiita/issues/1724
-                if artifact.artifact_type in ['SFF', 'FASTQ', 'FASTA',
-                                              'FASTA_Sanger',
-                                              'per_sample_FASTQ']:
+                if artifact.artifact_type in RAW_DATA_ARTIFACT_TYPE:
                     study = artifact.study
                     has_access = study.has_access(user, no_public=True)
                     if (not study.public_raw_download and not has_access):
@@ -469,7 +470,7 @@ def generate_biom_and_metadata_release(study_status='public'):
         for c in iter(lambda: f.read(4096), b""):
             md5sum.update(c)
 
-    rename(tgz_name, tgz_name_final)
+    move(tgz_name, tgz_name_final)
 
     vals = [
         ('filepath', tgz_name_final[len(working_dir):], r_client.set),
@@ -543,7 +544,7 @@ def generate_plugin_releases():
         md5sum = md5()
         for c in iter(lambda: f.read(4096), b""):
             md5sum.update(c)
-    rename(tgz_name, tgz_name_final)
+    move(tgz_name, tgz_name_final)
     vals = [
         ('filepath', tgz_name_final[len(working_dir):], r_client.set),
         ('md5sum', md5sum.hexdigest(), r_client.set),
@@ -593,7 +594,7 @@ def update_resource_allocation_redis(active=True):
                 if len(df) == 0:
                     continue
 
-                fig, axs = resource_allocation_plot(df, cname, sname, col_name)
+                fig, axs = resource_allocation_plot(df, col_name)
                 titles = [0, 0]
                 images = [0, 0]
 
@@ -605,21 +606,18 @@ def update_resource_allocation_redis(active=True):
                     # only time
                     new_fig = plt.figure()
                     new_ax = new_fig.add_subplot(111)
-
-                    scatter_data = ax.collections[0]
-                    new_ax.scatter(scatter_data.get_offsets()[:, 0],
-                                   scatter_data.get_offsets()[:, 1],
-                                   s=scatter_data.get_sizes(), label="data")
-
                     line = ax.lines[0]
                     new_ax.plot(line.get_xdata(), line.get_ydata(),
                                 linewidth=1, color='orange')
-
-                    if len(ax.collections) > 1:
-                        failure_data = ax.collections[1]
-                        new_ax.scatter(failure_data.get_offsets()[:, 0],
-                                       failure_data.get_offsets()[:, 1],
-                                       color='red', s=3, label="failures")
+                    handles, labels = ax.get_legend_handles_labels()
+                    for handle, label, scatter_data in zip(handles,
+                                                           labels,
+                                                           ax.collections):
+                        color = handle.get_facecolor()
+                        new_ax.scatter(scatter_data.get_offsets()[:, 0],
+                                       scatter_data.get_offsets()[:, 1],
+                                       s=scatter_data.get_sizes(), label=label,
+                                       color=color)
 
                     new_ax.set_xscale('log')
                     new_ax.set_yscale('log')
 
@@ -0,0 +1,7 @@
+-- Jan 13, 2025
+-- Adding a table for formulas for resource allocations
+CREATE TABLE qiita.allocation_equations (
+  equation_id     SERIAL PRIMARY KEY,
+  equation_name   TEXT NOT NULL,
+  expression      TEXT NOT NULL
+ );
@@ -0,0 +1,10 @@
+INSERT INTO qiita.allocation_equations(equation_name, expression)
+        VALUES 
+        ('mem_model1', '(k * (np.log(x))) + (x * a) + b'),
+('mem_model2', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + a'),
+('mem_model3', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.np.log(x))**3))'),
+('mem_model4', '(k * (np.log(x))) + (b * ((np.log(x))**2)) + (a * ((np.log(x))**2.5))'),
+('time_model1', 'a + b + ((np.log(x)) * k)'),
+('time_model2', 'a + (b * x) + ((np.log(x)) * k)'),
+('time_model3', 'a + (b * ((np.log(x))**2)) + ((np.log(x)) * k)'),
+('time_model4', '(a * ((np.log(x))**3)) + (b * ((np.log(x))**2)) + ((np.log(x)) * k)');
@@ -88,6 +88,7 @@ INSERT INTO qiita.artifact_type VALUES (5, 'per_sample_FASTQ', NULL, true, false
 INSERT INTO qiita.artifact_type VALUES (7, 'BIOM', 'BIOM table', false, false, true);
 
 
+
 --
 -- Data for Name: data_type; Type: TABLE DATA; Schema: qiita; Owner: antoniog
 --
 
@@ -529,16 +529,19 @@ def test_update_resource_allocation_redis(self):
             cname, sname, version, col_name, 'title_mem')
         title_mem = str(r_client.get(title_mem_str))
         self.assertTrue(
-            "model: "
-            "k * log(x) + "
-            "b * log(x)^2 + "
-            "a * log(x)^3" in title_mem
+            "model: (k * (np.log(x))) + "
+            "(b * ((np.log(x))**2)) + "
+            "(a * ((np.log(x))**2.5))" in title_mem
         )
 
         title_time_str = 'resources$#%s$#%s$#%s$#%s:%s' % (
                         cname, sname, version, col_name, 'title_time')
         title_time = str(r_client.get(title_time_str))
-        self.assertTrue("model: a + b + log(x) * k" in title_time)
+        self.assertTrue(
+            "model: (a * ((np.log(x))**3)) + "
+            "(b * ((np.log(x))**2)) + "
+            "((np.log(x)) * k)" in title_time
+        )
 
 
 if __name__ == '__main__':
 
@@ -1329,8 +1329,7 @@ def setUp(self):
 
     def test_plot_return(self):
         # check the plot returns correct objects
-        fig1, axs1 = qdb.util.resource_allocation_plot(
-            self.df, self.cname, self.sname, self.col_name)
+        fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name)
         self.assertIsInstance(
             fig1, Figure,
             "Returned object fig1 is not a Matplotlib Figure")
@@ -1346,46 +1345,46 @@ def test_minimize_const(self):
         self.df[self.col_name] = self.df.samples * self.df['columns']
         fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False)
 
-        bm, options = qdb.util._resource_allocation_plot_helper(
-            self.df, axs[0], self.cname, self.sname, 'MaxRSSRaw',
-            qdb.util.MODELS_MEM, self.col_name)
+        mem_models, time_models = qdb.util.retrieve_equations()
+        bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
+            self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name)
         # check that the algorithm chooses correct model for MaxRSSRaw and
         # has 0 failures
         k, a, b = options.x
-        failures_df = qdb.util._resource_allocation_failures(
-            self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')
+        failures_df = qdb.util._resource_allocation_success_failures(
+            self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1]
         failures = failures_df.shape[0]
-        self.assertEqual(bm, qdb.util.mem_model3,
+
+        self.assertEqual(bm_name, 'mem_model4',
+                         msg=f"""Best memory model
+                         doesn't match
+                         {bm_name} != 'mem_model4'""")
+        self.assertEqual(bm, mem_models['mem_model4']['equation'],
                          msg=f"""Best memory model
                                  doesn't match
                                  Coefficients:{k} {a} {b}
-                                 {qdb.util.mem_model1}, "qdb.util.mem_model1"
-                                 {qdb.util.mem_model2}, "qdb.util.mem_model2"
-                                 {qdb.util.mem_model3}, "qdb.util.mem_model3"
-                                 {qdb.util.mem_model4}, "qdb.util.mem_model4"
                             """)
         self.assertEqual(failures, 0, "Number of failures must be 0")
 
         # check that the algorithm chooses correct model for ElapsedRaw and
         # has 1 failure
-        bm, options = qdb.util._resource_allocation_plot_helper(
-            self.df, axs[1], self.cname, self.sname, 'ElapsedRaw',
-            qdb.util.MODELS_TIME, self.col_name)
+        bm_name, bm, options = qdb.util._resource_allocation_plot_helper(
+            self.df, axs[1], 'ElapsedRaw', time_models, self.col_name)
         k, a, b = options.x
-        failures_df = qdb.util._resource_allocation_failures(
-            self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')
+        failures_df = qdb.util._resource_allocation_success_failures(
+            self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1]
         failures = failures_df.shape[0]
+        self.assertEqual(bm_name, 'time_model4',
+                         msg=f"""Best time model
+                         doesn't match
+                         {bm_name} != 'time_model4'""")
 
-        self.assertEqual(bm, qdb.util.time_model1,
+        self.assertEqual(bm, time_models[bm_name]['equation'],
                          msg=f"""Best time model
                                 doesn't match
                                 Coefficients:{k} {a} {b}
-                                 {qdb.util.time_model1}, "qdb.util.time_model1"
-                                 {qdb.util.time_model2}, "qdb.util.time_model2"
-                                 {qdb.util.time_model3}, "qdb.util.time_model3"
-                                 {qdb.util.time_model4}, "qdb.util.time_model4"
                                 """)
-        self.assertEqual(failures, 1, "Number of failures must be 1")
+        self.assertEqual(failures, 0, "Number of failures must be 0")
 
     def test_MaxRSS_helper(self):
         tests = [