Skip to content

Commit a3ce16a

Browse files
authored
Support Python 3.11 (#7)
* Pin transient urllib3 dependency to unbreak requests==2.30 * Pin to requests-toolbelt>=1 instead of pinning transient urllib3 * Add back importlib-metadata dependency * Allow Python 3.11 and add version to CI matrix * Pin urllib3 per requests issue recommendation, leave TODO * Pin transient urllib3 dependency to unbreak requests==2.30 * Pin to requests-toolbelt>=1 instead of pinning transient urllib3 * Add back importlib-metadata dependency * Tediously update all enum usages to use Enum.KEY.value instead of Enum.KEY, which works on Python<3.11 but broke in 3.11 (see: python/cpython#100458) * Move FR-specific upload_image_chips to kolena.fr._utils; remove kolena.fr._consts; rename some already-hidden classes to remove leading underscore (already in underscored files) * Revert {_=>}BatchedLoader diff, this PR is not the place for that * Start updating integration tests with new description edit semantics * Add missing pytest-depends dev dependency * Fix FR test__edit__reset * Assert description update without version update * Unbreak upload_image_chips after move * Unbreak FR test suite test__load by comparing data to data * Remove bad comparison
1 parent 295f79f commit a3ce16a

34 files changed

+268
-271
lines changed

.circleci/config.yml

+5-3
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@ jobs:
3333
- /home/circleci/project/.poetry/virtualenvs
3434
- poetry.lock
3535
- run: poetry run python3 -c 'import kolena'
36+
# TODO: fix underlying mypy issues with Python>3.9 rather than skipping
3637
- when:
3738
condition:
3839
not:
39-
# TODO: upgrade mypy version to address # https://github.com/python/mypy/issues/13627
40-
equal: [ "3.10", << parameters.python-version >> ]
40+
or:
41+
- equal: [ "3.10", << parameters.python-version >> ]
42+
- equal: [ "3.11", << parameters.python-version >> ]
4143
steps:
4244
- run: poetry run pre-commit run -a
4345
- run:
@@ -108,7 +110,7 @@ workflows:
108110
name: unit-test-<< matrix.python-version >>
109111
matrix:
110112
parameters:
111-
python-version: [ "3.7", "3.8", "3.9", "3.10" ]
113+
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
112114
- integration-test:
113115
matrix:
114116
parameters:

kolena/_api/v1/batched_load.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ class Path(str, Enum):
2525

2626
@classmethod
2727
def upload_signed_url(cls, load_uuid: str) -> str:
28-
return f"{cls.UPLOAD_SIGNED_URL_STUB}/{load_uuid}"
28+
return f"{cls.UPLOAD_SIGNED_URL_STUB.value}/{load_uuid}"
2929

3030
@classmethod
3131
def download_by_path(cls, path: str) -> str:
32-
return f"{cls.DOWNLOAD_BY_PATH_STUB}/{path}"
32+
return f"{cls.DOWNLOAD_BY_PATH_STUB.value}/{path}"
3333

3434
@dataclass(frozen=True)
3535
class WithLoadUUID:

kolena/_api/v1/samples.py

-19
This file was deleted.

kolena/_utils/asset_path_mapper.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def absolute_locator(self, test_run_id: int, load_uuid: str, image_id: int, key:
3030
def relative_locator(self, path_stub: str) -> str:
3131
return f"{self.prefix}/{path_stub}"
3232

33-
def path_stub(self, test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
33+
@staticmethod
34+
def path_stub(test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
3435
return f"{test_run_id}/{image_id}/{key}-{load_uuid}.png"
3536

3637
def _absolute_locator(self, relative_locator: str) -> str:

kolena/_utils/batched_load.py

+2-48
Original file line numberDiff line numberDiff line change
@@ -26,26 +26,20 @@
2626
import numpy as np
2727
import pandas as pd
2828
import requests
29-
from PIL import Image
30-
from requests_toolbelt import MultipartEncoder
3129
from retrying import retry
3230

3331
from kolena._api.v1.batched_load import BatchedLoad as API
34-
from kolena._api.v1.fr import Asset as AssetAPI
3532
from kolena._utils import krequests
3633
from kolena._utils import log
37-
from kolena._utils.asset_path_mapper import AssetPathMapper
3834
from kolena._utils.datatypes import LoadableDataFrame
3935
from kolena._utils.serde import from_dict
40-
from kolena.fr._consts import _BatchSize
41-
from kolena.fr.datatypes import _ImageChipsDataFrame
4236

4337
VALIDATION_COUNT_LIMIT = 100
4438
STAGE_STATUS__LOADED = "LOADED"
4539

4640

4741
def init_upload() -> API.InitiateUploadResponse:
48-
init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD)
42+
init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD.value)
4943
krequests.raise_for_status(init_res)
5044
init_response = from_dict(data_class=API.InitiateUploadResponse, data=init_res.json())
5145
return init_response
@@ -78,46 +72,6 @@ def upload_data_frame_chunk(df_chunk: pd.DataFrame, load_uuid: str) -> None:
7872
krequests.raise_for_status(upload_response)
7973

8074

81-
def upload_image_chips(
82-
df: _ImageChipsDataFrame,
83-
path_mapper: AssetPathMapper,
84-
batch_size: int = _BatchSize.UPLOAD_CHIPS,
85-
) -> None:
86-
def upload_batch(df_batch: _ImageChipsDataFrame) -> None:
87-
df_batch = df_batch.reset_index(drop=True) # reset indices so we match the signed_url indices
88-
89-
def as_buffer(image_raw: np.ndarray) -> io.BytesIO:
90-
pil_image = Image.fromarray(image_raw).convert("RGB")
91-
buf = io.BytesIO()
92-
pil_image.save(buf, "png")
93-
buf.seek(0)
94-
return buf
95-
96-
data = MultipartEncoder(
97-
fields=[
98-
(
99-
"files",
100-
(
101-
path_mapper.path_stub(row["test_run_id"], row["uuid"], row["image_id"], row["key"]),
102-
as_buffer(row["image"]),
103-
),
104-
)
105-
for _, row in df_batch.iterrows()
106-
],
107-
)
108-
upload_response = krequests.put(
109-
endpoint_path=AssetAPI.Path.BULK_UPLOAD,
110-
data=data,
111-
headers={"Content-Type": data.content_type},
112-
)
113-
krequests.raise_for_status(upload_response)
114-
115-
num_chunks = math.ceil(len(df) / batch_size)
116-
chunk_iter = np.array_split(df, num_chunks) if len(df) > 0 else []
117-
for df_chunk in chunk_iter:
118-
upload_batch(df_chunk)
119-
120-
12175
DFType = TypeVar("DFType", bound=LoadableDataFrame)
12276

12377

@@ -157,7 +111,7 @@ def complete_load(uuid: Optional[str]) -> None:
157111
return
158112
complete_request = API.CompleteDownloadRequest(uuid=uuid)
159113
complete_res = krequests.put(
160-
endpoint_path=API.Path.COMPLETE_DOWNLOAD,
114+
endpoint_path=API.Path.COMPLETE_DOWNLOAD.value,
161115
data=json.dumps(dataclasses.asdict(complete_request)),
162116
)
163117
krequests.raise_for_status(complete_res)

kolena/_utils/_consts.py renamed to kolena/_utils/consts.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from enum import Enum
1515

1616

17-
class _BatchSize(int, Enum):
18-
UPLOAD_CHIPS = 5_000
17+
class BatchSize(int, Enum):
18+
UPLOAD_CHIPS = 1_000
1919
UPLOAD_RECORDS = 10_000_000
2020
UPLOAD_RESULTS = 1_000_000
2121

kolena/_utils/instrumentation.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,15 @@ def upload_log(message: str, status: str) -> None:
5454
message=message,
5555
status=status,
5656
)
57-
krequests.post(endpoint_path=API.Path.UPLOAD, json=dataclasses.asdict(request))
57+
krequests.post(endpoint_path=API.Path.UPLOAD.value, json=dataclasses.asdict(request))
5858

5959

6060
def log_telemetry(e: BaseException) -> None:
6161
try:
6262
stack = tb.format_stack()
6363
exc_format = tb.format_exception(None, e, e.__traceback__)
6464
combined = stack + exc_format
65-
upload_log("".join(combined), DatadogLogLevels.ERROR)
65+
upload_log("".join(combined), DatadogLogLevels.ERROR.value)
6666
except BaseException:
6767
"""
6868
Attempting to upload the telemetry is best-effort. We don't want to have exceptions in that

kolena/_utils/repository.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
def create(repository: str) -> None:
2323
response = krequests.post(
24-
endpoint_path=Path.CREATE,
24+
endpoint_path=Path.CREATE.value,
2525
data=json.dumps(dataclasses.asdict(CreateRepositoryRequest(repository=repository))),
2626
)
2727
krequests.raise_for_status(response)

kolena/detection/_internal/model.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@
3333
from kolena._api.v1.workflow import WorkflowType
3434
from kolena._utils import krequests
3535
from kolena._utils import log
36-
from kolena._utils._consts import _BatchSize
3736
from kolena._utils.batched_load import _BatchedLoader
3837
from kolena._utils.batched_load import DFType
38+
from kolena._utils.consts import BatchSize
3939
from kolena._utils.frozen import Frozen
4040
from kolena._utils.instrumentation import WithTelemetry
4141
from kolena._utils.serde import from_dict
@@ -93,7 +93,7 @@ def __init__(self, name: str, workflow: WorkflowType, metadata: Optional[Dict[st
9393
def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) -> CoreAPI.EntityData:
9494
log.info(f"creating new model '{name}'")
9595
request = CoreAPI.CreateRequest(name=name, metadata=metadata, workflow=workflow.value)
96-
res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
96+
res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
9797
krequests.raise_for_status(res)
9898
log.success(f"created new model '{name}'")
9999
return from_dict(data_class=CoreAPI.EntityData, data=res.json())
@@ -102,7 +102,7 @@ def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) ->
102102
@validate_arguments(config=ValidatorConfig)
103103
def _load_by_name(cls, name: str) -> CoreAPI.EntityData:
104104
request = CoreAPI.LoadByNameRequest(name=name)
105-
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
105+
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
106106
krequests.raise_for_status(res)
107107
return from_dict(data_class=CoreAPI.EntityData, data=res.json())
108108

@@ -131,7 +131,7 @@ def iter_inferences(
131131
def _iter_inference_batch_for_reference(
132132
self,
133133
test_object: Union[_TestCaseClass, _TestSuiteClass],
134-
batch_size: int = _BatchSize.LOAD_SAMPLES,
134+
batch_size: int = BatchSize.LOAD_SAMPLES.value,
135135
) -> Iterator[_LoadInferencesDataFrameClass]:
136136
if batch_size <= 0:
137137
raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
@@ -143,7 +143,7 @@ def _iter_inference_batch_for_reference(
143143
init_request = API.InitLoadInferencesRequest(**params)
144144
yield from _BatchedLoader.iter_data(
145145
init_request=init_request,
146-
endpoint_path=API.Path.INIT_LOAD_INFERENCES,
146+
endpoint_path=API.Path.INIT_LOAD_INFERENCES.value,
147147
df_class=self._LoadInferencesDataFrameClass,
148148
)
149149
log.success(f"loaded inferences from model '{self.name}' on {test_object_display_name}")
@@ -166,7 +166,7 @@ def load_inferences_by_test_case(
166166
def _iter_inference_batch_for_test_suite(
167167
self,
168168
test_suite: _TestSuiteClass,
169-
batch_size: int = _BatchSize.LOAD_SAMPLES,
169+
batch_size: int = BatchSize.LOAD_SAMPLES.value,
170170
) -> Iterator[_LoadInferencesDataFrameClass]:
171171
if batch_size <= 0:
172172
raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
@@ -175,7 +175,7 @@ def _iter_inference_batch_for_test_suite(
175175
init_request = API.InitLoadInferencesByTestCaseRequest(**params)
176176
yield from _BatchedLoader.iter_data(
177177
init_request=init_request,
178-
endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE,
178+
endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE.value,
179179
df_class=self._LoadInferencesDataFrameClass,
180180
)
181181
log.success(f"loaded inferences from model '{self.name}' on test suite '{test_suite.name}'")

kolena/detection/_internal/test_case.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@
3030
from kolena._api.v1.workflow import WorkflowType
3131
from kolena._utils import krequests
3232
from kolena._utils import log
33-
from kolena._utils._consts import _BatchSize
3433
from kolena._utils.batched_load import _BatchedLoader
3534
from kolena._utils.batched_load import DFType
3635
from kolena._utils.batched_load import init_upload
3736
from kolena._utils.batched_load import upload_data_frame
37+
from kolena._utils.consts import BatchSize
3838
from kolena._utils.dataframes.validators import validate_df_schema
3939
from kolena._utils.frozen import Frozen
4040
from kolena._utils.instrumentation import WithTelemetry
@@ -128,7 +128,7 @@ def _create(
128128
"""Create a new test case with the provided name."""
129129
log.info(f"creating new test case '{name}'")
130130
request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=workflow.value)
131-
res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
131+
res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
132132
krequests.raise_for_status(res)
133133
data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
134134
obj = cls._create_from_data(data)
@@ -142,7 +142,7 @@ def _create(
142142
def _load_by_name(cls, name: str, version: Optional[int] = None) -> CoreAPI.EntityData:
143143
"""Load an existing test case with the provided name."""
144144
request = CoreAPI.LoadByNameRequest(name=name, version=version)
145-
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
145+
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
146146
krequests.raise_for_status(res)
147147
return from_dict(data_class=CoreAPI.EntityData, data=res.json())
148148

@@ -173,10 +173,10 @@ def load_images(self) -> List[_TestImageClass]:
173173
def iter_images(self) -> Iterator[_TestImageClass]:
174174
"""Iterate through all images with their associated ground truths in this test case."""
175175
log.info(f"loading test images for test case '{self.name}'")
176-
init_request = CoreAPI.InitLoadContentsRequest(batch_size=_BatchSize.LOAD_SAMPLES, test_case_id=self._id)
176+
init_request = CoreAPI.InitLoadContentsRequest(batch_size=BatchSize.LOAD_SAMPLES.value, test_case_id=self._id)
177177
for df in _BatchedLoader.iter_data(
178178
init_request=init_request,
179-
endpoint_path=API.Path.INIT_LOAD_IMAGES,
179+
endpoint_path=API.Path.INIT_LOAD_IMAGES.value,
180180
df_class=self._TestImageDataFrameClass,
181181
):
182182
for record in df.itertuples():
@@ -312,7 +312,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
312312
init_response = init_upload()
313313
df = self._to_data_frame(list(editor._images.values()))
314314
df_serialized = df.as_serializable()
315-
upload_data_frame(df=df_serialized, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
315+
upload_data_frame(df=df_serialized, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)
316316

317317
request = CoreAPI.CompleteEditRequest(
318318
test_case_id=self._id,
@@ -322,7 +322,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
322322
uuid=init_response.uuid,
323323
)
324324
complete_res = krequests.put(
325-
endpoint_path=API.Path.COMPLETE_EDIT,
325+
endpoint_path=API.Path.COMPLETE_EDIT.value,
326326
data=json.dumps(dataclasses.asdict(request)),
327327
)
328328
krequests.raise_for_status(complete_res)

0 commit comments

Comments
 (0)