Skip to content

Commit 4fc1bf8

Browse files
Crucifixion-FxlCrucifixion-Fxl
and
Crucifixion-Fxl
authored
[Bugfix] Migrate to REGEX Library to prevent catastrophic backtracking (#18454)
Signed-off-by: Crucifixion-Fxl <[email protected]> Co-authored-by: Crucifixion-Fxl <[email protected]>
1 parent f203673 commit 4fc1bf8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+62
-58
lines changed

.github/scripts/cleanup_pr_body.sh

100755100644
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ sed -i '/\*\*BEFORE SUBMITTING, PLEASE READ.*\*\*/,$d' "${NEW}"
2626

2727
# Remove HTML <details> section that includes <summary> text of "PR Checklist (Click to Expand)"
2828
python3 - <<EOF
29-
import re
29+
import regex as re
3030
3131
with open("${NEW}", "r") as file:
3232
content = file.read()

benchmarks/benchmark_serving_structured_output.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def process_one_metric(
672672
def evaluate(ret, args):
673673
def _eval_correctness_json(expected, actual):
674674
# extract json string from string using regex
675-
import re
675+
import regex as re
676676

677677
actual = actual.replace("\n", "").replace(" ", "").strip()
678678
try:
@@ -687,7 +687,7 @@ def _eval_correctness_choice(expected, actual):
687687
return actual in args.choice
688688

689689
def _eval_correctness_regex(expected, actual):
690-
import re
690+
import regex as re
691691

692692
return re.match(args.regex, actual) is not None
693693

benchmarks/kernels/graph_machete_bench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
import math
44
import pickle
5-
import re
65
from collections import defaultdict
76

87
import matplotlib.pyplot as plt
98
import pandas as pd
9+
import regex as re
1010
import seaborn as sns
1111
from torch.utils.benchmark import Measurement as TMeasurement
1212

examples/offline_inference/prithvi_geospatial_mae.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
import argparse
2121
import datetime
2222
import os
23-
import re
2423
from typing import Union
2524

2625
import albumentations
2726
import numpy as np
2827
import rasterio
28+
import regex as re
2929
import torch
3030
from einops import rearrange
3131
from terratorch.datamodules import Sen1Floods11NonGeoDataModule

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ requires = [
88
"setuptools-scm>=8.0",
99
"torch == 2.7.0",
1010
"wheel",
11+
"regex",
1112
"jinja2",
1213
]
1314
build-backend = "setuptools.build_meta"

requirements/build.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ setuptools-scm>=8
77
torch==2.7.0
88
wheel
99
jinja2>=3.1.6
10+
regex

requirements/common.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
regex # Replace re for higher-performance regex matching
12
cachetools
23
psutil
34
sentencepiece # Required for LLaMA tokenizer.

requirements/nightly_torch_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ matplotlib # required for qwen-vl test
3838
# required for Multi-Modal Models Test (Standard)
3939
num2words # required for smolvlm test
4040
pqdm
41-
timm # required for internvl test
41+
timm # required for internvl test

setup.py

100755100644
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
import json
66
import logging
77
import os
8-
import re
98
import subprocess
109
import sys
1110
from pathlib import Path
1211
from shutil import which
1312

13+
import regex as re
1414
import torch
1515
from packaging.version import Version, parse
1616
from setuptools import Extension, setup
@@ -389,7 +389,6 @@ def run(self) -> None:
389389
# vllm_flash_attn python code:
390390
# Regex from
391391
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
392-
import re
393392
compiled_regex = re.compile(
394393
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
395394
file_members += list(

tests/entrypoints/llm/test_guided_generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import json
4-
import re
54
import weakref
65
from enum import Enum
76

87
import jsonschema
98
import pytest
9+
import regex as re
1010
from pydantic import BaseModel
1111

1212
from vllm.distributed import cleanup_dist_env_and_memory

tests/entrypoints/openai/test_chat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
# imports for guided decoding tests
44
import json
5-
import re
65
from typing import Optional
76

87
import jsonschema
98
import openai # use the official client for correctness check
109
import pytest
1110
import pytest_asyncio
11+
import regex as re
1212
import requests
1313
import torch
1414
from openai import BadRequestError, OpenAI

tests/entrypoints/openai/test_completion.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
32
# imports for guided decoding tests
43
import json
5-
import re
64
import shutil
75
from tempfile import TemporaryDirectory
86
from typing import Optional
@@ -11,6 +9,7 @@
119
import openai # use the official client for correctness check
1210
import pytest
1311
import pytest_asyncio
12+
import regex as re
1413
# downloading lora to test lora requests
1514
from huggingface_hub import snapshot_download
1615
from openai import BadRequestError

tests/entrypoints/openai/test_prompt_validation.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
# imports for guided decoding tests
4-
import re
5-
64
import openai
75
import pytest
6+
import regex as re
87

98
from ...utils import RemoteOpenAIServer
109

@@ -32,7 +31,7 @@ async def test_out_of_vocab_token_ids():
3231
client = remote_server.get_async_client()
3332

3433
with pytest.raises(openai.BadRequestError,
35-
match=re.compile('.*out of vocabulary.*')):
34+
match=re.compile('.*out of vocabulary.*').pattern):
3635
await client.completions.create(model=model_name,
3736
prompt=[999999],
3837
max_tokens=5,
@@ -46,9 +45,10 @@ async def test_reject_multistep_with_guided_decoding():
4645
with RemoteOpenAIServer(model_name, server_args) as remote_server:
4746
client = remote_server.get_async_client()
4847

49-
with pytest.raises(openai.BadRequestError,
50-
match=re.compile(
51-
'.*Guided decoding .* multi-step decoding.*')):
48+
with pytest.raises(
49+
openai.BadRequestError,
50+
match=re.compile(
51+
'.*Guided decoding .* multi-step decoding.*').pattern):
5252
await client.completions.create(
5353
model=model_name,
5454
prompt="Hello",

tests/models/multimodal/generation/test_phi4mm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import os
4-
import re
54
from collections.abc import Sequence
65
from typing import Optional
76

87
import librosa
98
import pytest
9+
import regex as re
1010
from huggingface_hub import snapshot_download
1111
from transformers import AutoTokenizer
1212

tests/models/multimodal/generation/vlm_utils/model_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
for manipulating the input / output of HF & vLLM test runners, which are
44
typically specific to a small subset of models.
55
"""
6-
import re
76
import types
87
from pathlib import PosixPath
98
from typing import Optional, Union
109

10+
import regex as re
1111
import torch
1212
from PIL.Image import Image
1313
from transformers import (AutoConfig, AutoTokenizer, BatchFeature,

tests/tool_use/test_tool_choice_required.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22
import json
3-
import re
43
from copy import deepcopy
54
from unittest.mock import MagicMock
65

76
import pytest
7+
import regex as re
88
from pydantic import TypeAdapter
99

1010
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
@@ -333,4 +333,4 @@ def test_streaming_output_valid(output, empty_params, delta_len):
333333
combined_messages += message.tool_calls[0].function.arguments
334334
combined_messages += "}]"
335335
assert json.loads(combined_messages) == output
336-
assert json.dumps(json.loads(combined_messages)) == output_json
336+
assert json.dumps(json.loads(combined_messages)) == output_json

tests/v1/entrypoints/llm/test_struct_output_generate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
from __future__ import annotations
55

66
import json
7-
import re
87
from enum import Enum
98
from typing import TYPE_CHECKING, Any
109

1110
import jsonschema
1211
import pytest
12+
import regex as re
1313
from pydantic import BaseModel
1414

1515
from tests.reasoning.utils import run_reasoning_extraction

tests/v1/entrypoints/openai/test_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
import re
43
from typing import Optional
54

65
import openai # use the official client for correctness check
76
import pytest
87
import pytest_asyncio
8+
import regex as re
99
from openai import BadRequestError
1010

1111
from tests.utils import RemoteOpenAIServer

tests/v1/sample/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
import re
43
from enum import Enum
54
from typing import Optional
65

6+
import regex as re
7+
78
from vllm import CompletionOutput
89

910

vllm/collect_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,4 +815,4 @@ def main():
815815

816816

817817
if __name__ == '__main__':
818-
main()
818+
main()

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import hashlib
77
import inspect
88
import json
9-
import re
109
import textwrap
1110
import uuid
1211
import warnings
@@ -20,6 +19,7 @@
2019
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
2120
Protocol, TypeVar, Union, cast, get_args, get_origin)
2221

22+
import regex as re
2323
import torch
2424
from torch.distributed import ProcessGroup, ReduceOp
2525
from transformers import PretrainedConfig

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import argparse
55
import dataclasses
66
import json
7-
import re
87
import sys
98
import threading
109
import warnings
@@ -13,6 +12,7 @@
1312
from typing import (Annotated, Any, Callable, Dict, List, Literal, Optional,
1413
Type, TypeVar, Union, cast, get_args, get_origin)
1514

15+
import regex as re
1616
import torch
1717
from typing_extensions import TypeIs, deprecated
1818

vllm/entrypoints/openai/api_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import inspect
88
import multiprocessing
99
import os
10-
import re
1110
import signal
1211
import socket
1312
import tempfile
@@ -21,6 +20,7 @@
2120
from typing import Annotated, Optional, Union
2221

2322
import prometheus_client
23+
import regex as re
2424
import uvloop
2525
from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
2626
from fastapi.exceptions import RequestValidationError

vllm/entrypoints/openai/protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
# Adapted from
44
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
55
import json
6-
import re
76
import time
87
from http import HTTPStatus
98
from typing import Annotated, Any, ClassVar, Literal, Optional, Union
109

10+
import regex as re
1111
import torch
1212
from fastapi import HTTPException, UploadFile
1313
from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,

vllm/entrypoints/openai/serving_chat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
import asyncio
44
import json
5-
import re
65
import time
76
from collections.abc import AsyncGenerator, AsyncIterator
87
from collections.abc import Sequence as GenericSequence
98
from typing import Callable, Final, Optional, Union
109

1110
import jinja2
1211
import partial_json_parser
12+
import regex as re
1313
from fastapi import Request
1414
from pydantic import TypeAdapter
1515

vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
import re
43
from collections.abc import Sequence
54
from typing import Union
65

6+
import regex as re
7+
78
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
89
DeltaFunctionCall, DeltaMessage,
910
DeltaToolCall,

vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import json
4-
import re
54
from collections.abc import Sequence
65
from json import JSONDecoder
76
from typing import Union
87

98
import partial_json_parser
9+
import regex as re
1010
from partial_json_parser.core.options import Allow
1111

1212
from vllm.entrypoints.chat_utils import random_tool_call_id

vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import json
4-
import re
54
from collections.abc import Sequence
65
from typing import Union
76

87
import partial_json_parser
8+
import regex as re
99
from partial_json_parser.core.options import Allow
1010

1111
from vllm.entrypoints.chat_utils import random_tool_call_id

0 commit comments

Comments
 (0)