Skip to content

Commit 0ec9568

Browse files
committed
Bump Version 0.6.0
1 parent b84da94 commit 0ec9568

File tree

7 files changed

+109
-107
lines changed

7 files changed

+109
-107
lines changed

Diff for: CHANGELOG.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
# superduper.io Changelog
22

3-
All notable changes to this project will be documented in this file.
3+
All notable changes to this project will be documented in this file.
44

55
The format is inspired by (but not strictly follows) [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88
**Before you create a Pull Request, remember to update the Changelog with your changes.**
99

10-
## Changes Since Last Release
10+
## [0.6.0](https://github.com/superduper-io/superduper/compare/0.6.0...0.5.0]) (2025-Mar-26)
1111

1212
#### Changed defaults / behaviours
1313

@@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717
- No need to define `_fields`
1818
- Use databackend to perform metadata duties
1919
- Add `db.create` and `db.insert` instead of `auto_schema`
20-
- Merkel-tree implementation replacing random `.uuid` with deterministic implementation
20+
- Merkle-tree implementation replacing random `.uuid` with deterministic implementation
2121
- Simplify the `Template` class
2222
- Simplify `Component` lifecycle by removing `Component.pre_create`
2323
- Renamed `Component.init` to `Component.setup`

Diff for: plugins/mongodb/superduper_mongodb/data_backend.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
import os
32
import typing as t
43

54
import click

Diff for: pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
66
name = "superduper-framework"
77
description = "Build compositional and declarative AI applications and agents"
88
readme = "README.md"
9-
version = '0.5.0'
9+
version = '0.6.0'
1010
license = {file = "LICENSE"}
1111
maintainers = [{name = "superduper.io, Inc.", email = "[email protected]"}]
1212
keywords = [

Diff for: superduper/base/config_settings.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,14 @@ def config(self) -> t.Any:
6666
try:
6767
with open(USER_CONFIG) as f:
6868
kwargs = yaml.safe_load(f)
69-
except FileNotFoundError as e:
69+
except FileNotFoundError:
7070
if USER_CONFIG != f'{HOME}/.superduper/config.yaml':
71-
raise ConfigError(
72-
f'Could not find config file: {USER_CONFIG}'
73-
) from e
71+
from warnings import warn
72+
73+
warn(
74+
f'Could not find config file: {USER_CONFIG}, '
75+
'falling back to defaults...'
76+
)
7477
if self.base:
7578
kwargs = kwargs.get(self.base, {})
7679

Diff for: superduper/base/query.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ def predict_ids(self):
9494

9595
def to_query(self):
9696
"""Convert decomposition back to a ``Query``."""
97+
if self.db is None:
98+
self.db = db
99+
97100
q = self.db[self.table]
98101

99102
if self.pre_like:
@@ -360,7 +363,7 @@ def outputs(self, *predict_ids):
360363
361364
:param predict_ids: The predict_ids to add. # noqa
362365
"""
363-
d = self.decomposition
366+
d: Decomposition = self.decomposition
364367

365368
assert not d.outputs
366369

Diff for: templates/multimodal_video_search/build.ipynb

+42-38
Original file line numberDiff line numberDiff line change
@@ -185,56 +185,60 @@
185185
"import cv2\n",
186186
"import tqdm\n",
187187
"from PIL import Image\n",
188-
"from superduper import Schema, ObjectModel\n",
188+
"from superduper import ObjectModel\n",
189189
"from superduper.base.datatype import FileItem\n",
190190
"from superduper.misc.importing import isreallyinstance\n",
191191
"\n",
192192
"\n",
193-
"def chunker(video_file):\n",
194-
" # Set the sampling frequency for frames\n",
193+
"class Chunker:\n",
194+
" def __hash__(self):\n",
195+
" return 1234567890\n",
195196
"\n",
196-
" if isreallyinstance(video_file, FileItem):\n",
197-
" video_file = video_file.unpack()\n",
198-
" sample_freq = 100\n",
199-
" \n",
200-
" # Open the video file using OpenCV\n",
201-
" cap = cv2.VideoCapture(video_file)\n",
202-
" \n",
203-
" # Initialize variables\n",
204-
" frame_count = 0\n",
205-
" fps = cap.get(cv2.CAP_PROP_FPS)\n",
206-
" extracted_frames = []\n",
207-
" progress = tqdm.tqdm()\n",
197+
" def __call__(self, video_file):\n",
198+
" # Set the sampling frequency for frames\n",
208199
"\n",
209-
" # Iterate through video frames\n",
210-
" while True:\n",
211-
" ret, frame = cap.read()\n",
212-
" if not ret:\n",
213-
" break\n",
200+
" if isreallyinstance(video_file, FileItem):\n",
201+
" video_file = video_file.unpack()\n",
202+
" sample_freq = 100\n",
214203
" \n",
215-
" # Get the current timestamp based on frame count and FPS\n",
216-
" current_timestamp = frame_count // fps\n",
204+
" # Open the video file using OpenCV\n",
205+
" cap = cv2.VideoCapture(video_file)\n",
217206
" \n",
218-
" # Sample frames based on the specified frequency\n",
219-
" if frame_count % sample_freq == 0:\n",
220-
" extracted_frames.append({\n",
221-
" 'image': Image.fromarray(frame[:,:,::-1]), # Convert BGR to RGB\n",
222-
" 'current_timestamp': current_timestamp,\n",
223-
" })\n",
224-
" frame_count += 1\n",
225-
" progress.update(1)\n",
226-
" \n",
227-
" # Release resources \n",
228-
" cap.release()\n",
229-
" cv2.destroyAllWindows()\n",
230-
" \n",
231-
" # Return the list of extracted frames\n",
232-
" return extracted_frames\n",
207+
" # Initialize variables\n",
208+
" frame_count = 0\n",
209+
" fps = cap.get(cv2.CAP_PROP_FPS)\n",
210+
" extracted_frames = []\n",
211+
" progress = tqdm.tqdm()\n",
212+
"\n",
213+
" # Iterate through video frames\n",
214+
" while True:\n",
215+
" ret, frame = cap.read()\n",
216+
" if not ret:\n",
217+
" break\n",
218+
" \n",
219+
" # Get the current timestamp based on frame count and FPS\n",
220+
" current_timestamp = frame_count // fps\n",
221+
" \n",
222+
" # Sample frames based on the specified frequency\n",
223+
" if frame_count % sample_freq == 0:\n",
224+
" extracted_frames.append({\n",
225+
" 'image': Image.fromarray(frame[:,:,::-1]), # Convert BGR to RGB\n",
226+
" 'current_timestamp': current_timestamp,\n",
227+
" })\n",
228+
" frame_count += 1\n",
229+
" progress.update(1)\n",
230+
" \n",
231+
" # Release resources \n",
232+
" cap.release()\n",
233+
" cv2.destroyAllWindows()\n",
234+
" \n",
235+
" # Return the list of extracted frames\n",
236+
" return extracted_frames\n",
233237
"\n",
234238
"\n",
235239
"chunker = ObjectModel(\n",
236240
" 'chunker', \n",
237-
" object=chunker,\n",
241+
" object=Chunker(),\n",
238242
" datatype='image=superduper_pillow.pil_image|current_timestamp=int',\n",
239243
")"
240244
]

Diff for: templates/pdf_rag/build.ipynb

+52-59
Original file line numberDiff line numberDiff line change
@@ -148,32 +148,36 @@
148148
"import os\n",
149149
"\n",
150150
"\n",
151-
"def split_image(pdf_path):\n",
152-
" if hasattr(pdf_path, 'unpack'):\n",
153-
" pdf_path = pdf_path.unpack()\n",
154-
" \n",
155-
" logging.info(f\"Splitting images from {pdf_path}\")\n",
151+
"class SplitImage:\n",
152+
" def __hash__(self):\n",
153+
" return 1234567890\n",
154+
"\n",
155+
" def __call__(self, pdf_path):\n",
156+
" if hasattr(pdf_path, 'unpack'):\n",
157+
" pdf_path = pdf_path.unpack()\n",
158+
" \n",
159+
" logging.info(f\"Splitting images from {pdf_path}\")\n",
156160
"\n",
157-
" image_folders = \"data/pdf-images\"\n",
158-
" pdf_name = os.path.basename(pdf_path)\n",
159-
" images = convert_from_path(pdf_path)\n",
160-
" logging.info(f\"Number of images: {len(images)}\")\n",
161+
" image_folders = \"data/pdf-images\"\n",
162+
" pdf_name = os.path.basename(pdf_path)\n",
163+
" images = convert_from_path(pdf_path)\n",
164+
" logging.info(f\"Number of images: {len(images)}\")\n",
161165
"\n",
162-
" image_folder = os.path.join(image_folders, pdf_name)\n",
163-
" if not os.path.exists(image_folder):\n",
164-
" os.makedirs(image_folder)\n",
166+
" image_folder = os.path.join(image_folders, pdf_name)\n",
167+
" if not os.path.exists(image_folder):\n",
168+
" os.makedirs(image_folder)\n",
165169
"\n",
166-
" data = []\n",
167-
" for i, image in enumerate(images):\n",
168-
" path = os.path.join(image_folder, f\"{i}.jpg\")\n",
169-
" image.save(os.path.join(path))\n",
170-
" data.append(path)\n",
171-
" return data\n",
170+
" data = []\n",
171+
" for i, image in enumerate(images):\n",
172+
" path = os.path.join(image_folder, f\"{i}.jpg\")\n",
173+
" image.save(os.path.join(path))\n",
174+
" data.append(path)\n",
175+
" return data\n",
172176
"\n",
173177
"\n",
174178
"model_split_image = ObjectModel(\n",
175179
" identifier=\"split_image\",\n",
176-
" object=split_image,\n",
180+
" object=SplitImage(),\n",
177181
" datatype='file',\n",
178182
")\n",
179183
"\n",
@@ -287,32 +291,36 @@
287291
" return datas\n",
288292
"\n",
289293
"\n",
290-
"def get_chunks(pdf):\n",
291-
" from collections import defaultdict\n",
292-
" from unstructured.documents.coordinates import RelativeCoordinateSystem\n",
293-
" from unstructured.partition.pdf import partition_pdf\n",
294+
"class GetChunks:\n",
295+
" def __hash__(self):\n",
296+
" return 24681012\n",
297+
"\n",
298+
" def __call__(self, pdf):\n",
299+
" from collections import defaultdict\n",
300+
" from unstructured.documents.coordinates import RelativeCoordinateSystem\n",
301+
" from unstructured.partition.pdf import partition_pdf\n",
302+
"\n",
303+
" if hasattr(pdf, 'unpack'):\n",
304+
" pdf = pdf.unpack()\n",
294305
"\n",
295-
" if hasattr(pdf, 'unpack'):\n",
296-
" pdf = pdf.unpack()\n",
306+
" elements = partition_pdf(pdf)\n",
307+
" elements = remove_annotation(elements)\n",
297308
"\n",
298-
" elements = partition_pdf(pdf)\n",
299-
" elements = remove_annotation(elements)\n",
309+
" pages_elements = defaultdict(list)\n",
310+
" for element in elements:\n",
311+
" element.convert_coordinates_to_new_system(\n",
312+
" RelativeCoordinateSystem(), in_place=True\n",
313+
" )\n",
314+
" pages_elements[element.metadata.page_number].append(element)\n",
300315
"\n",
301-
" pages_elements = defaultdict(list)\n",
302-
" for element in elements:\n",
303-
" element.convert_coordinates_to_new_system(\n",
304-
" RelativeCoordinateSystem(), in_place=True\n",
316+
" all_chunks_and_links = sum(\n",
317+
" [\n",
318+
" create_chunk_and_metadatas(page_elements)\n",
319+
" for _, page_elements in pages_elements.items()\n",
320+
" ],\n",
321+
" [],\n",
305322
" )\n",
306-
" pages_elements[element.metadata.page_number].append(element)\n",
307-
"\n",
308-
" all_chunks_and_links = sum(\n",
309-
" [\n",
310-
" create_chunk_and_metadatas(page_elements)\n",
311-
" for _, page_elements in pages_elements.items()\n",
312-
" ],\n",
313-
" [],\n",
314-
" )\n",
315-
" return all_chunks_and_links"
323+
" return all_chunks_and_links"
316324
]
317325
},
318326
{
@@ -324,7 +332,7 @@
324332
"source": [
325333
"model_chunk = ObjectModel(\n",
326334
" identifier=\"chunk\",\n",
327-
" object=get_chunks,\n",
335+
" object=GetChunks(),\n",
328336
" datatype='json',\n",
329337
")\n",
330338
"\n",
@@ -367,7 +375,6 @@
367375
"metadata": {},
368376
"outputs": [],
369377
"source": [
370-
"from superduper_openai.model import OpenAIEmbedding\n",
371378
"from superduper import VectorIndex\n",
372379
"\n",
373380
"listener_embedding = Listener(\n",
@@ -413,7 +420,6 @@
413420
" identifier=\"processor\",\n",
414421
" chunk_key=listener_chunk.outputs,\n",
415422
" split_image_key=listener_split_image.outputs,\n",
416-
" upstream=[Plugin(path=\"./utils.py\")],\n",
417423
")"
418424
]
419425
},
@@ -446,7 +452,6 @@
446452
"metadata": {},
447453
"outputs": [],
448454
"source": [
449-
"from superduper_openai.model import OpenAIChatCompletion\n",
450455
"from utils import Rag\n",
451456
"\n",
452457
"prompt_template = (\n",
@@ -468,18 +473,6 @@
468473
")"
469474
]
470475
},
471-
{
472-
"cell_type": "code",
473-
"execution_count": null,
474-
"id": "09e04c4c-c932-4358-ae2f-61cc482f0ff4",
475-
"metadata": {},
476-
"outputs": [],
477-
"source": [
478-
"from utils import Rag\n",
479-
"\n",
480-
"Rag.__module__"
481-
]
482-
},
483476
{
484477
"cell_type": "markdown",
485478
"id": "fde11162-e994-4621-af36-b5fa9bc3f258",
@@ -505,8 +498,8 @@
505498
" listener_chunk,\n",
506499
" vector_index,\n",
507500
" rag\n",
508-
" ]\n",
509-
" \n",
501+
" ],\n",
502+
" upstream=[Plugin(path=\"./utils.py\")],\n",
510503
")"
511504
]
512505
},

0 commit comments

Comments
 (0)