|
3 | 3 | import argparse
|
4 | 4 | import dataclasses
|
5 | 5 | import json
|
| 6 | +import re |
6 | 7 | import threading
|
7 | 8 | from dataclasses import dataclass
|
8 | 9 | from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
|
@@ -368,10 +369,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
368 | 369 | 'data type. CUDA 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. '
|
369 | 370 | 'ROCm (AMD GPU) supports fp8 (=fp8_e4m3)')
|
370 | 371 | parser.add_argument('--max-model-len',
|
371 |
| - type=int, |
| 372 | + type=human_readable_int, |
372 | 373 | default=EngineArgs.max_model_len,
|
373 | 374 | help='Model context length. If unspecified, will '
|
374 |
| - 'be automatically derived from the model config.') |
| 375 | + 'be automatically derived from the model config. ' |
| 376 | + 'Supports k/m/g/K/M/G in human-readable format.\n' |
| 377 | + 'Examples:\n' |
| 378 | + '- 1k → 1000\n' |
| 379 | + '- 1K → 1024\n') |
375 | 380 | parser.add_argument(
|
376 | 381 | '--guided-decoding-backend',
|
377 | 382 | type=str,
|
@@ -1740,6 +1745,47 @@ def _warn_or_fallback(feature_name: str) -> bool:
|
1740 | 1745 | return should_exit
|
1741 | 1746 |
|
1742 | 1747 |
|
| 1748 | +def human_readable_int(value): |
| 1749 | + """Parse human-readable integers like '1k', '2M', etc. |
| 1750 | + Including decimal values with decimal multipliers. |
| 1751 | + |
| 1752 | + Examples: |
| 1753 | + - '1k' -> 1,000 |
| 1754 | + - '1K' -> 1,024 |
| 1755 | + - '25.6k' -> 25,600 |
| 1756 | + """ |
| 1757 | + value = value.strip() |
| 1758 | + match = re.fullmatch(r'(\d+(?:\.\d+)?)([kKmMgGtT])', value) |
| 1759 | + if match: |
| 1760 | + decimal_multiplier = { |
| 1761 | + 'k': 10**3, |
| 1762 | + 'm': 10**6, |
| 1763 | + 'g': 10**9, |
| 1764 | + } |
| 1765 | + binary_multiplier = { |
| 1766 | + 'K': 2**10, |
| 1767 | + 'M': 2**20, |
| 1768 | + 'G': 2**30, |
| 1769 | + } |
| 1770 | + |
| 1771 | + number, suffix = match.groups() |
| 1772 | + if suffix in decimal_multiplier: |
| 1773 | + mult = decimal_multiplier[suffix] |
| 1774 | + return int(float(number) * mult) |
| 1775 | + elif suffix in binary_multiplier: |
| 1776 | + mult = binary_multiplier[suffix] |
| 1777 | + # Do not allow decimals with binary multipliers |
| 1778 | + try: |
| 1779 | + return int(number) * mult |
| 1780 | + except ValueError as e: |
| 1781 | + raise argparse.ArgumentTypeError("Decimals are not allowed " \ |
| 1782 | + f"with binary suffixes like {suffix}. Did you mean to use " \ |
| 1783 | + f"{number}{suffix.lower()} instead?") from e |
| 1784 | + |
| 1785 | + # Regular plain number. |
| 1786 | + return int(value) |
| 1787 | + |
| 1788 | + |
1743 | 1789 | # These functions are used by sphinx to build the documentation
|
1744 | 1790 | def _engine_args_parser():
|
1745 | 1791 | return EngineArgs.add_cli_args(FlexibleArgumentParser())
|
|
0 commit comments