Skip to content

feat: Scalar things #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jul 29, 2023
2 changes: 2 additions & 0 deletions cloudquery/sdk/scalar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from .scalar import Scalar, ScalarInvalidTypeError, NULL_VALUE
from .scalar_factory import ScalarFactory
from .vector import Vector

from .binary import Binary
from .bool import Bool
from .date32 import Date32
from .date64 import Date64
from .float import Float
from .int import Int
from .list import List
from .string import String
from .timestamp import Timestamp
from .uint import Uint
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ def set(self, value: any):
self._valid = False
return

if isinstance(value, Binary):
self._valid = value.is_valid
self._value = value.value
return

if type(value) == bytes:
self._valid = True
self._value = value
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/bool.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ def set(self, value: Any):
self._valid = False
return

if isinstance(value, Bool):
self._valid = value.is_valid
self._value = value.value
return

if type(value) == bool:
self._value = value
elif type(value) == str:
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/date32.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ def set(self, value: Any):
self._valid = False
return

if isinstance(value, Date32):
self._valid = value.is_valid
self._value = value.value
return

if type(value) == datetime:
self._value = value
elif type(value) == str:
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/date64.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ def set(self, value: Any):
self._valid = False
return

if isinstance(value, Date64):
self._valid = value.is_valid
self._value = value.value
return

if type(value) == datetime:
self._value = value
elif type(value) == str:
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/float.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def set(self, value: any):
self._valid = False
return

if isinstance(value, Float) and value.bitwidth == self._bitwidth:
self._valid = value.is_valid
self._value = value.value
return

if type(value) == int:
self._value = float(value)
elif type(value) == float:
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/int.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def set(self, value: any):
self._valid = False
return

if isinstance(value, Int) and value.bitwidth == self._bitwidth:
self._valid = value.is_valid
self._value = value.value
return

if type(value) == int:
self._value = value
elif type(value) == float:
Expand Down
60 changes: 60 additions & 0 deletions cloudquery/sdk/scalar/list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from cloudquery.sdk.scalar import Scalar, ScalarInvalidTypeError
from .scalar import NULL_VALUE
from .vector import Vector
from typing import Any, Type, Union


class List(Scalar):
def __init__(self, scalar_type: Type[Scalar]):
super().__init__(False, None)
self._value = Vector(scalar_type)
self._type = scalar_type

def __eq__(self, other: Union[None, "List"]) -> bool:
if other is None:
return False
if type(self) != type(other):
return False
if self._valid != other._valid:
return False
return self._value == other._value

@property
def type(self):
return self._type

@property
def value(self):
return self._value

def set(self, val: Any):
if val is None:
self._valid = False
self._value = Vector()
return

if isinstance(val, Scalar) and type(val) == self._type:
if not val.is_valid:
self._valid = False
self._value = Vector()
return
return self.set([val.value])

if isinstance(val, (list, tuple)):
self._value = Vector()
for item in val:
scalar = self._type()
scalar.set(item)
self._value.append(scalar)
self._valid = True
return

raise ScalarInvalidTypeError("Invalid type {} for List".format(type(val)))

def __str__(self) -> str:
if not self._valid:
return NULL_VALUE
return f"[{', '.join(str(v) for v in self._value)}]"

def __len__(self):
return len(self.value.data)
12 changes: 9 additions & 3 deletions cloudquery/sdk/scalar/scalar_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .date64 import Date64
from .float import Float
from .int import Int
from .list import List
from .string import String
from .timestamp import Timestamp
from .uint import Uint
Expand All @@ -16,7 +17,7 @@ class ScalarFactory:
def __init__(self):
pass

def new_scalar(self, dt):
def new_scalar(self, dt: pa.DataType):
dt_id = dt.id
if dt_id == pa.types.lib.Type_INT64:
return Int(bitwidth=64)
Expand Down Expand Up @@ -62,8 +63,13 @@ def new_scalar(self, dt):
return Float(bitwidth=16)
# elif dt_id == pa.types.lib.Type_INTERVAL_MONTH_DAY_NANO:
# return ()
# elif dt_id == pa.types.lib.Type_LIST or dt_id == pa.types.lib.Type_LARGE_LIST or dt_id == pa.types.lib.Type_FIXED_SIZE_LIST:
# return List()
elif (
dt_id == pa.types.lib.Type_LIST
or dt_id == pa.types.lib.Type_LARGE_LIST
or dt_id == pa.types.lib.Type_FIXED_SIZE_LIST
):
item = ScalarFactory.new_scalar(dt.field(0).type)
return List(type(item))
# elif dt_id == pa.types.lib.Type_MAP:
# return ()
elif (
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ def set(self, value: any):
self._valid = False
return

if isinstance(value, String):
self._valid = value._valid
self._value = value.value
return

if type(value) == str:
self._valid = True
self._value = value
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ def set(self, value: Any):
self._valid = False
return

if isinstance(value, Timestamp):
self._valid = value.is_valid
self._value = value.value
return

if isinstance(value, pd.Timestamp):
self._value = value
elif type(value) == datetime:
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/uint.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def set(self, value: any):
self._valid = False
return

if isinstance(value, Uint) and value.bitwidth == self._bitwidth:
self._valid = value.is_valid
self._value = value.value
return

if type(value) == int:
val = value
elif type(value) == float:
Expand Down
5 changes: 5 additions & 0 deletions cloudquery/sdk/scalar/uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def set(self, value: any):
self._valid = False
return

if isinstance(value, UUID):
self._valid = value.is_valid
self._value = value.value
return

if type(value) == uuid.UUID:
self._value = value
elif type(value) == str:
Expand Down
41 changes: 41 additions & 0 deletions cloudquery/sdk/scalar/vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from .scalar import Scalar
from typing import Type


class Vector:
def __init__(self, type: Type[Scalar] = None, *args):
self.data = []
self.type = type
for arg in args:
self.append(arg) # Use the append method for type checking and appending

def append(self, item):
if not isinstance(item, Scalar):
raise TypeError("Item is not of type Scalar or its subclass")

if self.type is None:
self.type = type(item)
self.data.append(item)
elif isinstance(item, self.type):
self.data.append(item)
else:
raise TypeError(f"Item is not of type {self.type.__name__}")

def __eq__(self, other):
if not isinstance(other, Vector):
return False
if len(self) != len(other):
return False
for self_item, other_item in zip(self.data, other.data):
if self_item != other_item:
return False
return True

def __getitem__(self, index):
return self.data[index]

def __len__(self):
return len(self.data)

def __repr__(self):
return f"Vector of {self.type.__name__ if self.type else 'unknown type'}: {self.data}"
43 changes: 43 additions & 0 deletions tests/scalar/list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from cloudquery.sdk.scalar import String, Bool, List


def test_list():
s = type(String())
l = List(s)
assert l == List(s)

l.set([String(True, "a string"), String(True, "another string"), String(False)])
assert len(l) == 3


def test_list_eq():
s = type(String())
l1 = List(s)
l1.set([String(True, "a string"), String(True, "another string"), String(False)])

l2 = List(s)
l2.set([String(True, "a string"), String(True, "another string"), String(False)])

assert l1 == l2


def test_list_ineq():
s = type(String())
l1 = List(s)
l1.set([String(True, "a string")])

l2 = List(s)
l2.set([String(True, "another string")])

assert l1 != l2


def test_list_eq_invalid():
s = type(String())
l1 = List(s)
l1.set([String(False)])

l2 = List(s)
l2.set([String(False)])

assert l1 == l2
45 changes: 45 additions & 0 deletions tests/scalar/vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from cloudquery.sdk.scalar import String, Bool, Vector


def test_vector_append():
s = type(String())
v = Vector(s)
assert v == Vector(s)

v.append(String(True, "a string"))
v.append(String(True, "another string"))
v.append(String(False))
assert len(v) == 3


def test_vector_invalid_type_append():
s = type(String())
v = Vector(s)
b = Bool(True, True)
try:
v.append(b)
assert False
except:
assert True


def test_vector_eq():
s = type(String())
v1 = Vector(s)
v1.append(String(True, "a string"))

v2 = Vector(s)
v2.append(String(True, "a string"))

assert v1 == v2


def test_vector_ineq():
s = type(String())
v1 = Vector(s)
v1.append(String(True, "a string"))

v2 = Vector(s)
v2.append(String(True, "another string"))

assert v1 != v2