Skip to content

Commit 2e89102

Browse files
committed
fixed issue #31
1 parent 593975a commit 2e89102

File tree

3 files changed

+7
-154
lines changed

3 files changed

+7
-154
lines changed

openomics/database/ontology.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class Ontology(Dataset):
1313

1414
def __init__(self, path, file_resources=None, col_rename=None, npartitions=0, verbose=False):
1515
"""
16-
Manages dataset input processing from tables and construct an ontology network from obo file. There ontology
16+
Manages dataset input processing from tables and construct an ontology network from .obo file. There ontology
1717
network is G(V,E) where there exists e_ij for child i to parent j to present "node i is_a node j".
1818
1919
Args:

openomics/utils/GTF.py

Lines changed: 0 additions & 146 deletions
This file was deleted.

openomics/utils/read_gtf.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from collections import OrderedDict
1717
from os.path import exists
1818

19-
import dask
2019
import dask.dataframe as dd
2120
import numpy as np
2221
import pandas as pd
@@ -292,16 +291,16 @@ def parse_gtf_and_expand_attributes(filepath_or_buffer, npartitions=None, compre
292291
features (set or None): Ignore entries which don't correspond to one of the supplied features
293292
"""
294293
if npartitions:
295-
ddf = parse_gtf_dask(filepath_or_buffer, npartitions=npartitions, compression=compression, features=features)
296-
ddf = ddf.reset_index(drop=False)
297-
ddf = ddf.set_index("index")
294+
df = parse_gtf_dask(filepath_or_buffer, npartitions=npartitions, compression=compression, features=features)
295+
df = df.reset_index(drop=False)
296+
df = df.set_index("index")
298297

299-
attribute_values = ddf.pop("attribute")
298+
attribute_values = df.pop("attribute")
300299

301300
for column_name, values in expand_attribute_strings(attribute_values,
302301
usecols=restrict_attribute_columns).items():
303302
series = dd.from_array(np.array(values, dtype=np.str))
304-
ddf[column_name] = series
303+
df[column_name] = series
305304
else:
306305
df = parse_gtf(filepath_or_buffer, chunksize=chunksize, features=features)
307306

@@ -311,7 +310,7 @@ def parse_gtf_and_expand_attributes(filepath_or_buffer, npartitions=None, compre
311310
usecols=restrict_attribute_columns).items():
312311
df[column_name] = values
313312

314-
return ddf
313+
return df
315314

316315

317316
def read_gtf(filepath_or_buffer, npartitions=None, compression=None, expand_attribute_column=True,

0 commit comments

Comments
 (0)