changes per code review

mitzimorris · mitzimorris · commit 41f963337046 · 2025-02-18T13:21:16.000-05:00
diff --git a/cmdstanpy/stanfit/mcmc.py b/cmdstanpy/stanfit/mcmc.py
@@ -97,7 +97,7 @@ def __init__(
         self._max_treedepths: np.ndarray = np.zeros(
             self.runset.chains, dtype=int
         )
-        self._chain_timing: List[Optional[Dict[str, float]]] = []
+        self._chain_time: List[Dict[str, float]] = []
 
         # info from CSV header and initial and final comment blocks
         config = self._validate_csv_files()
@@ -242,12 +242,12 @@ def max_treedepths(self) -> Optional[np.ndarray]:
         return self._max_treedepths if not self._is_fixed_param else None
 
     @property
-    def timing(self) -> List[Optional[Dict[str, float]]]:
+    def time(self) -> List[Dict[str, float]]:
         """
-        List of per-chain timing info scraped from CSV file.
+        List of per-chain time info scraped from CSV file.
         Each chain has dict with keys "warmup", "sampling", "total".
         """
-        return self._chain_timing
+        return self._chain_time
 
     def draws(
         self, *, inc_warmup: bool = False, concat_chains: bool = False
@@ -310,7 +310,7 @@ def _validate_csv_files(self) -> Dict[str, Any]:
                     save_warmup=self._save_warmup,
                     thin=self._thin,
                 )
-                self._chain_timing.append(dzero.get("timing"))
+                self._chain_time.append(dzero.get("time"))
                 if not self._is_fixed_param:
                     self._divergences[i] = dzero['ct_divergences']
                     self._max_treedepths[i] = dzero['ct_max_treedepth']
@@ -323,7 +323,7 @@ def _validate_csv_files(self) -> Dict[str, Any]:
                     save_warmup=self._save_warmup,
                     thin=self._thin,
                 )
-                self._chain_timing.append(drest.get("timing"))
+                self._chain_time.append(drest.get("time"))
                 for key in dzero:
                     # check args that matter for parsing, plus name, version
                     if (
diff --git a/cmdstanpy/utils/stancsv.py b/cmdstanpy/utils/stancsv.py
@@ -79,7 +79,7 @@ def scan_sampler_csv(path: str, is_fixed_param: bool = False) -> Dict[str, Any]:
                 lineno = scan_warmup_iters(fd, dict, lineno)
                 lineno = scan_hmc_params(fd, dict, lineno)
             lineno = scan_sampling_iters(fd, dict, lineno, is_fixed_param)
-            lineno = scan_timing(fd, dict, lineno)
+            lineno = scan_time(fd, dict, lineno)
         except ValueError as e:
             raise ValueError("Error in reading csv file: " + path) from e
     return dict
@@ -381,24 +381,24 @@ def scan_sampling_iters(
         config_dict['ct_max_treedepth'] = ct_max_treedepth
     return lineno
 
-def scan_timing(fd: TextIO, config_dict: Dict[str, Any], lineno: int) -> int:
+def scan_time(fd: TextIO, config_dict: Dict[str, Any], lineno: int) -> int:
     """
-    Scan timing information from the trailing comment lines in a Stan CSV file.
+    Scan time information from the trailing comment lines in a Stan CSV file.
 
     #  Elapsed Time: 0.001332 seconds (Warm-up)
     #                0.000249 seconds (Sampling)
     #                0.001581 seconds (Total)
 
 
-    It extracts the time values and saves them in the config_dict under the key 'timing'
+    It extracts the time values and saves them in the config_dict under the key 'time'
     as a dictionary with keys 'warmup', 'sampling', and 'total'.
-    Returns the updated line number after reading the timing info.
+    Returns the updated line number after reading the time info.
 
-    :param fd: Open file descriptor positioned at the timing section.
-    :param config_dict: Dictionary to which the timing info is added.
+    :param fd: Open file descriptor at comment row following all sample data.
+    :param config_dict: Dictionary to which the time info is added.
     :param lineno: Current line number
     """
-    timing = {}
+    time = {}
     keys = ['warmup', 'sampling', 'total']
     while True:
         pos = fd.tell()
@@ -414,27 +414,29 @@ def scan_timing(fd: TextIO, config_dict: Dict[str, Any], lineno: int) -> int:
         content = stripped.lstrip('#').strip()
         if not content:
             continue
-        tokens = content.lower().split()
-        if 'elapsed' in tokens[0]:
+        tokens = content.split()
+        if len(tokens) < 3:
+            raise ValueError(f"Invalid time at line {lineno}: {content}")
+        if 'Warm-up' in content:
             key = 'warmup'
-            try:
-                t = float(tokens[2])
-            except ValueError:
-                raise ValueError(f"Invalid timing value at line {lineno}: {content}")
+            time_str = tokens[2]
+        elif 'Sampling' in content:
+            key = 'sampling'
+            time_str = tokens[0]
+        elif  'Total' in content:
+            key = 'total'
+            time_str = tokens[0]
         else:
-            if 'sampling' in tokens[2]:
-                key = 'sampling'
-            elif 'total' in tokens[2]:
-                key = 'total'
-            try:
-                t = float(tokens[0])
-            except ValueError:
-                raise ValueError(f"Invalid timing value at line {lineno}: {content}")
-        timing[key] = t
-    if not all(key in timing for key in keys):
-        raise ValueError(f"Invalid timing, stopped at {lineno}")
+            raise ValueError(f"Invalid time at line {lineno}: {content}")
+        try:
+            t = float(time_str)
+        except ValueError:
+            raise ValueError(f"Invalid time value at line {lineno}: {content}")
+        time[key] = t
+    if not all(key in time for key in keys):
+        raise ValueError(f"Invalid time, stopped at {lineno}")
         
-    config_dict['timing'] = timing
+    config_dict['time'] = time
     return lineno
 
 
diff --git a/test/test_sample.py b/test/test_sample.py
@@ -1714,11 +1714,11 @@ def test_metadata() -> None:
     assert fit.column_names == col_names
     assert fit.metric_type == 'diag_e'
 
-    assert len(fit.timing) == 4
+    assert len(fit.time) == 4
     for i in range(4):
-        assert 'warmup' in fit.timing[i].keys()
-        assert 'sampling' in fit.timing[i].keys()
-        assert 'total' in fit.timing[i].keys()
+        assert 'warmup' in fit.time[i].keys()
+        assert 'sampling' in fit.time[i].keys()
+        assert 'total' in fit.time[i].keys()
 
     assert fit.metadata.cmdstan_config['num_samples'] == 100
     assert fit.metadata.cmdstan_config['thin'] == 1
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -700,7 +700,8 @@ def test_munge_varnames() -> None:
     var = 'y.2.3:1.2:5:6'
     assert stancsv.munge_varname(var) == 'y[2,3].1[2].5.6'
 
-def test_scan_timing_normal() -> None:
+
+def test_scan_time_normal() -> None:
     csv_content = (
         "# Elapsed Time: 0.005 seconds (Warm-up)\n"
         "#                0 seconds (Sampling)\n"
@@ -709,12 +710,13 @@ def test_scan_timing_normal() -> None:
     fd = io.StringIO(csv_content)
     config_dict = {}
     start_line = 0
-    final_line = stancsv.scan_timing(fd, config_dict, start_line)
+    final_line = stancsv.scan_time(fd, config_dict, start_line)
     assert final_line == 3
     expected = {'warmup': 0.005, 'sampling': 0.0, 'total': 0.005}
-    assert config_dict.get('timing') == expected
+    assert config_dict.get('time') == expected
+
 
-def test_scan_timing_no_timing() -> None:
+def test_scan_time_no_timing() -> None:
     csv_content = (
         "# merrily we roll along\n"
         "# roll along\n"
@@ -723,11 +725,11 @@ def test_scan_timing_no_timing() -> None:
     fd = io.StringIO(csv_content)
     config_dict = {}
     start_line = 0
-    with pytest.raises(ValueError, match="Invalid timing"):
-        stancsv.scan_timing(fd, config_dict, start_line)
+    with pytest.raises(ValueError, match="Invalid time"):
+        stancsv.scan_time(fd, config_dict, start_line)
 
 
-def test_scan_timing_invalid_value() -> None:
+def test_scan_time_invalid_value() -> None:
     csv_content = (
         "# Elapsed Time: abc seconds (Warm-up)\n"
         "#                0.200 seconds (Sampling)\n"
@@ -736,5 +738,19 @@ def test_scan_timing_invalid_value() -> None:
     fd = io.StringIO(csv_content)
     config_dict = {}
     start_line = 0
-    with pytest.raises(ValueError, match="Invalid timing"):
-        stancsv.scan_timing(fd, config_dict, start_line)
+    with pytest.raises(ValueError, match="Invalid time"):
+        stancsv.scan_time(fd, config_dict, start_line)
+
+
+def test_scan_time_invalid_string() -> None:
+    csv_content = (
+        "# Elapsed Time: 0.22 seconds (foo)\n"
+        "#                0.200 seconds (Sampling)\n"
+        "#                0.300 seconds (Total)\n"
+    )
+    fd = io.StringIO(csv_content)
+    config_dict = {}
+    start_line = 0
+    with pytest.raises(ValueError, match="Invalid time"):
+        stancsv.scan_time(fd, config_dict, start_line)
+