Skip to content

Commit 33c5d37

Browse files
Added kwarg simplify_dtypes to simplify DataFrame returns.
- Added the kwarg `simplify_dtypes` to the functions `ledger`, `cols_operation`, `cols_operation_cumsum` and `cols_operation_balance_by_instrument`. This allows for dtype simplification (see pandas-dev/pandas#58543 (comment)). - Added some docstrings.
1 parent 14846a9 commit 33c5d37

File tree

1 file changed

+93
-11
lines changed

1 file changed

+93
-11
lines changed

lib/simple_portfolio_ledger/src/simple_portfolio_ledger.py

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -228,20 +228,48 @@ def ledger(
228228
cols_operation_balance_by_instrument=False,
229229
thousands_fmt_sep=False,
230230
thousands_fmt_decimals=1,
231-
):
231+
simplify_dtypes=True,
232+
) -> pd.DataFrame:
233+
"""Returns The Ledger, with optional additional information.
234+
235+
Args:
236+
cols_operation (bool, optional): Whether to add cols_operation to The Ledger. Defaults to False.
237+
cols_operation_cumsum (bool, optional): Whether to add cols_operation_cumsum to The Ledger. Defaults to False.
238+
cols_operation_balance_by_instrument (bool, optional): Whether to add cols_operation_balance_by_instrument to The Ledger. Defaults to False.
239+
thousands_fmt_sep (bool, optional): Add a thousands separator. Defaults to False.
240+
thousands_fmt_decimals (int, optional): Decimals to print, used only when thousands_fmt_sep is set to True. Defaults to 1.
241+
simplify_dtypes (bool, optional): Allows to simplify dtypes, for instance, pass from float64 to int64 if no decimals are present. Doesn't convert to a dtype that supports pd.NA, like `DataFrame.convert_dtypes()` although it uses it. See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339 . Warning: Might have a performance impact if True. Defaults to True.
242+
243+
Returns:
244+
pd.DataFrame: Returns The Ledger, with optional additional information.
245+
"""
232246
if len(self._ledger_df) == 0:
233247
warnings.warn('WARNING: Ledger is empty, showing only basic structure.')
234248

235-
dfs_toconcat = [self._ledger_df]
249+
the_ledger = self._ledger_df
250+
251+
if simplify_dtypes is True:
252+
with pd.option_context('future.no_silent_downcasting', True):
253+
the_ledger = (
254+
the_ledger
255+
# See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339
256+
.astype('object')
257+
.convert_dtypes()
258+
.astype('object')
259+
.replace(pd.NA, float('nan'))
260+
.infer_objects()
261+
)
262+
263+
dfs_toconcat = [the_ledger]
236264

237265
if cols_operation is True:
238-
tmp = self.cols_operation()
266+
tmp = self.cols_operation(simplify_dtypes=simplify_dtypes)
239267
dfs_toconcat.append(tmp)
240268
if cols_operation_cumsum is True:
241-
tmp = self.cols_operation_cumsum()
269+
tmp = self.cols_operation_cumsum(simplify_dtypes=simplify_dtypes)
242270
dfs_toconcat.append(tmp)
243271
if cols_operation_balance_by_instrument is True:
244-
tmp = self.cols_operation_balance_by_instrument()
272+
tmp = self.cols_operation_balance_by_instrument(simplify_dtypes=simplify_dtypes)
245273
dfs_toconcat.append(tmp)
246274

247275
to_return = (
@@ -271,11 +299,12 @@ def ledger(
271299
return to_return
272300

273301
@_deco_check_ledger_for_cols
274-
def cols_operation(self, show_instr_accnt=False):
302+
def cols_operation(self, show_instr_accnt=False, simplify_dtypes=True) -> pd.DataFrame:
275303
"""Returns a dataframe with 1 column per operation.
276304
277305
Args:
278306
show_instr_accnt (bool, optional): Whether or not to show the instrument and the account. Defaults to False.
307+
simplify_dtypes (bool, optional): Allows to simplify dtypes, for instance, pass from float64 to int64 if no decimals are present. Doesn't convert to a dtype that supports pd.NA, like `DataFrame.convert_dtypes()` although it uses it. See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339 . Warning: Might have a performance impact if True. Defaults to True.
279308
280309
Returns:
281310
pd.DataFrame: Returns a dataframe with 1 column per operation.
@@ -310,15 +339,33 @@ def cols_operation(self, show_instr_accnt=False):
310339
.infer_objects()
311340
)
312341

342+
if simplify_dtypes is True:
343+
with pd.option_context('future.no_silent_downcasting', True):
344+
to_return = (
345+
to_return
346+
# See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339
347+
.astype('object')
348+
.convert_dtypes()
349+
.astype('object')
350+
.replace(pd.NA, float('nan'))
351+
.infer_objects()
352+
)
353+
313354
if show_instr_accnt is True:
314355
return to_return[['instrument', 'account', *sorted(self._ops_names)]]
315356
else:
316357
return to_return[[*sorted(self._ops_names)]]
317358

318359
@_deco_check_ledger_for_cols
319-
def cols_operation_cumsum(self, show_instr_accnt=False):
320-
"""
321-
Add one column per operation but do a cumsum per instrument/operation.
360+
def cols_operation_cumsum(self, show_instr_accnt=False, simplify_dtypes=True) -> pd.DataFrame:
361+
"""Returns a DataFrame with one column per operation but do a cumsum per instrument/account.
362+
363+
Args:
364+
show_instr_accnt (bool, optional): Whether or not to show the instrument and the account. Defaults to False.
365+
simplify_dtypes (bool, optional): Allows to simplify dtypes, for instance, pass from float64 to int64 if no decimals are present. Doesn't convert to a dtype that supports pd.NA, like `DataFrame.convert_dtypes()` although it uses it. See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339 . Warning: Might have a performance impact if True. Defaults to True.
366+
367+
Returns:
368+
pd.DataFrame: Returns a DataFrame with one column per operation but do a cumsum per instrument/operation/account.
322369
"""
323370

324371
# List of columns to return, SORTED by self._ops_names
@@ -379,6 +426,18 @@ def cols_operation_cumsum(self, show_instr_accnt=False):
379426
.infer_objects()
380427
)
381428

429+
if simplify_dtypes is True:
430+
with pd.option_context('future.no_silent_downcasting', True):
431+
to_return = (
432+
to_return
433+
# See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339
434+
.astype('object')
435+
.convert_dtypes()
436+
.astype('object')
437+
.replace(pd.NA, float('nan'))
438+
.infer_objects()
439+
)
440+
382441
if show_instr_accnt is True:
383442
return to_return[
384443
['instrument', 'account', *ops_cumsum_names, 'cumsum held', 'cumsum invested']
@@ -387,7 +446,7 @@ def cols_operation_cumsum(self, show_instr_accnt=False):
387446
return to_return[[*ops_cumsum_names, 'cumsum held', 'cumsum invested']]
388447

389448
@staticmethod
390-
def _cols_operation_balance_by_instrument_for_group(group_df, new_columns):
449+
def _cols_operation_balance_by_instrument_for_group(group_df, new_columns) -> pd.DataFrame:
391450
"""
392451
WARNING: not to be called by itself. It needs a grouping per instrument.
393452
"""
@@ -557,7 +616,18 @@ def _cols_operation_balance_by_instrument_for_group(group_df, new_columns):
557616
return df
558617

559618
@_deco_check_ledger_for_cols
560-
def cols_operation_balance_by_instrument(self, show_instr_accnt=False):
619+
def cols_operation_balance_by_instrument(
620+
self, show_instr_accnt=False, simplify_dtypes=True
621+
) -> pd.DataFrame:
622+
"""Returns a DataFrame with a balance per operation per instrument/account.
623+
624+
Args:
625+
show_instr_accnt (bool, optional): Whether or not to show the instrument and the account. Defaults to False.
626+
simplify_dtypes (bool, optional): Allows to simplify dtypes, for instance, pass from float64 to int64 if no decimals are present. Doesn't convert to a dtype that supports pd.NA, like `DataFrame.convert_dtypes()` although it uses it. See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339 . Warning: Might have a performance impact if True. Defaults to True.
627+
628+
Returns:
629+
pd.DataFrame: Returns a DataFrame with a balance per operation per instrument/account.
630+
"""
561631

562632
new_columns = [
563633
'balance deposit',
@@ -611,6 +681,18 @@ def cols_operation_balance_by_instrument(self, show_instr_accnt=False):
611681
# Try to pass colums where dtype is object to a type like int64 or float64
612682
.infer_objects()
613683
)
684+
685+
if simplify_dtypes is True:
686+
with pd.option_context('future.no_silent_downcasting', True):
687+
to_return = (
688+
to_return
689+
# See https://github.com/pandas-dev/pandas/issues/58543#issuecomment-2101240339
690+
.astype('object')
691+
.convert_dtypes()
692+
.astype('object')
693+
.replace(pd.NA, float('nan'))
694+
.infer_objects()
695+
)
614696

615697
if show_instr_accnt is True:
616698
return to_return[['instrument', 'account', *new_columns]]

0 commit comments

Comments
 (0)