Improve error handling for invalid eval results in model cards (#3000)

hanouticelina · web-flow · commit 2cef17b7913d · 2025-04-16T14:45:04.000+02:00
* ignore eval results parsing when flag is true

* improve

* better

* move logic into a separate function

* fix typing
diff --git a/src/huggingface_hub/repocard_data.py b/src/huggingface_hub/repocard_data.py
@@ -245,6 +245,23 @@ def __len__(self) -> int:
         return len(self.__dict__)
 
 
+def _validate_eval_results(
+    eval_results: Optional[Union[EvalResult, List[EvalResult]]],
+    model_name: Optional[str],
+) -> List[EvalResult]:
+    if eval_results is None:
+        return []
+    if isinstance(eval_results, EvalResult):
+        eval_results = [eval_results]
+    if not isinstance(eval_results, list) or not all(isinstance(r, EvalResult) for r in eval_results):
+        raise ValueError(
+            f"`eval_results` should be of type `EvalResult` or a list of `EvalResult`, got {type(eval_results)}."
+        )
+    if model_name is None:
+        raise ValueError("Passing `eval_results` requires `model_name` to be set.")
+    return eval_results
+
+
 class ModelCardData(CardData):
     """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
 
@@ -359,10 +376,13 @@ def __init__(
         super().__init__(**kwargs)
 
         if self.eval_results:
-            if isinstance(self.eval_results, EvalResult):
-                self.eval_results = [self.eval_results]
-            if self.model_name is None:
-                raise ValueError("Passing `eval_results` requires `model_name` to be set.")
+            try:
+                self.eval_results = _validate_eval_results(self.eval_results, self.model_name)
+            except Exception as e:
+                if ignore_metadata_errors:
+                    logger.warning(f"Failed to validate eval_results: {e}. Not loading eval results into CardData.")
+                else:
+                    raise ValueError(f"Failed to validate eval_results: {e}") from e
 
     def _to_dict(self, data_dict):
         """Format the internal data dict. In this case, we convert eval results to a valid model index"""
diff --git a/tests/test_repocard_data.py b/tests/test_repocard_data.py
@@ -256,6 +256,49 @@ def test_remove_top_level_none_values(self):
         assert as_obj.pipeline_tag is None
         assert "pipeline_tag" not in as_dict  # top level none value should be removed
 
+    def test_eval_results_requires_evalresult_type(self):
+        with pytest.raises(ValueError, match="should be of type `EvalResult` or a list of `EvalResult`"):
+            ModelCardData(model_name="my-cool-model", eval_results="this is not an EvalResult")
+
+        with pytest.raises(ValueError, match="should be of type `EvalResult` or a list of `EvalResult`"):
+            ModelCardData(model_name="my-cool-model", eval_results=["accuracy: 0.9", "f1: 0.85"])
+
+        data = ModelCardData(
+            model_name="my-cool-model",
+            eval_results="this is not an EvalResult",
+            ignore_metadata_errors=True,
+        )
+        assert data.eval_results is not None and data.eval_results == "this is not an EvalResult"
+
+    def test_model_name_required_with_eval_results(self):
+        with pytest.raises(ValueError, match="`eval_results` requires `model_name` to be set"):
+            ModelCardData(
+                eval_results=[
+                    EvalResult(
+                        task_type="image-classification",
+                        dataset_type="beans",
+                        dataset_name="Beans",
+                        metric_type="acc",
+                        metric_value=0.9,
+                    ),
+                ],
+            )
+
+        eval_results = [
+            EvalResult(
+                task_type="image-classification",
+                dataset_type="beans",
+                dataset_name="Beans",
+                metric_type="acc",
+                metric_value=0.9,
+            ),
+        ]
+        data = ModelCardData(
+            eval_results=eval_results,
+            ignore_metadata_errors=True,
+        )
+        assert data.eval_results is not None and data.eval_results == eval_results
+
 
 class DatasetCardDataTest(unittest.TestCase):
     def test_train_eval_index_keys_updated(self):