Skip to content

Commit 2cef17b

Browse files
Improve error handling for invalid eval results in model cards (#3000)
* ignore eval results parsing when flag is true * improve * better * move logic into a separate function * fix typing
1 parent f3db24f commit 2cef17b

File tree

2 files changed

+67
-4
lines changed

2 files changed

+67
-4
lines changed

src/huggingface_hub/repocard_data.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,23 @@ def __len__(self) -> int:
245245
return len(self.__dict__)
246246

247247

248+
def _validate_eval_results(
249+
eval_results: Optional[Union[EvalResult, List[EvalResult]]],
250+
model_name: Optional[str],
251+
) -> List[EvalResult]:
252+
if eval_results is None:
253+
return []
254+
if isinstance(eval_results, EvalResult):
255+
eval_results = [eval_results]
256+
if not isinstance(eval_results, list) or not all(isinstance(r, EvalResult) for r in eval_results):
257+
raise ValueError(
258+
f"`eval_results` should be of type `EvalResult` or a list of `EvalResult`, got {type(eval_results)}."
259+
)
260+
if model_name is None:
261+
raise ValueError("Passing `eval_results` requires `model_name` to be set.")
262+
return eval_results
263+
264+
248265
class ModelCardData(CardData):
249266
"""Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
250267
@@ -359,10 +376,13 @@ def __init__(
359376
super().__init__(**kwargs)
360377

361378
if self.eval_results:
362-
if isinstance(self.eval_results, EvalResult):
363-
self.eval_results = [self.eval_results]
364-
if self.model_name is None:
365-
raise ValueError("Passing `eval_results` requires `model_name` to be set.")
379+
try:
380+
self.eval_results = _validate_eval_results(self.eval_results, self.model_name)
381+
except Exception as e:
382+
if ignore_metadata_errors:
383+
logger.warning(f"Failed to validate eval_results: {e}. Not loading eval results into CardData.")
384+
else:
385+
raise ValueError(f"Failed to validate eval_results: {e}") from e
366386

367387
def _to_dict(self, data_dict):
368388
"""Format the internal data dict. In this case, we convert eval results to a valid model index"""

tests/test_repocard_data.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,49 @@ def test_remove_top_level_none_values(self):
256256
assert as_obj.pipeline_tag is None
257257
assert "pipeline_tag" not in as_dict # top level none value should be removed
258258

259+
def test_eval_results_requires_evalresult_type(self):
260+
with pytest.raises(ValueError, match="should be of type `EvalResult` or a list of `EvalResult`"):
261+
ModelCardData(model_name="my-cool-model", eval_results="this is not an EvalResult")
262+
263+
with pytest.raises(ValueError, match="should be of type `EvalResult` or a list of `EvalResult`"):
264+
ModelCardData(model_name="my-cool-model", eval_results=["accuracy: 0.9", "f1: 0.85"])
265+
266+
data = ModelCardData(
267+
model_name="my-cool-model",
268+
eval_results="this is not an EvalResult",
269+
ignore_metadata_errors=True,
270+
)
271+
assert data.eval_results is not None and data.eval_results == "this is not an EvalResult"
272+
273+
def test_model_name_required_with_eval_results(self):
274+
with pytest.raises(ValueError, match="`eval_results` requires `model_name` to be set"):
275+
ModelCardData(
276+
eval_results=[
277+
EvalResult(
278+
task_type="image-classification",
279+
dataset_type="beans",
280+
dataset_name="Beans",
281+
metric_type="acc",
282+
metric_value=0.9,
283+
),
284+
],
285+
)
286+
287+
eval_results = [
288+
EvalResult(
289+
task_type="image-classification",
290+
dataset_type="beans",
291+
dataset_name="Beans",
292+
metric_type="acc",
293+
metric_value=0.9,
294+
),
295+
]
296+
data = ModelCardData(
297+
eval_results=eval_results,
298+
ignore_metadata_errors=True,
299+
)
300+
assert data.eval_results is not None and data.eval_results == eval_results
301+
259302

260303
class DatasetCardDataTest(unittest.TestCase):
261304
def test_train_eval_index_keys_updated(self):

0 commit comments

Comments
 (0)