Skip to content

Commit 09e7d3f

Browse files
committed
For lazyarrays, keep the file handle open
1 parent 6108a5b commit 09e7d3f

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

servicex/data_conversions.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,9 @@ async def _convert_root_to_awkward(self, file: Path):
145145
Note:
146146
147147
- Work is done on a second thread.
148-
- Pandas is only imported if this is called.
148+
- Awkward is only imported if this is called.
149+
- A LazyArray is returned, so it isn't completely loaded into memory. That also means this
150+
will leak filehandles - as that has to be left open.
149151
150152
'''
151153
from numpy import ndarray
@@ -155,11 +157,8 @@ def do_the_work(file: Path) -> Dict[Union[str, bytes], Union[ndarray, JaggedArra
155157
import uproot
156158

157159
f_in = uproot.open(file)
158-
try:
159-
r = f_in[f_in.keys()[0]]
160-
return r.lazyarrays() # type: ignore
161-
finally:
162-
f_in._context.source.close()
160+
r = f_in[f_in.keys()[0]]
161+
return r.lazyarrays() # type: ignore
163162

164163
return await asyncio.wrap_future(_conversion_pool.submit(do_the_work, file))
165164

tests/test_data_conversions.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,21 @@
44
import pandas as pd
55

66

7+
def check_awkward_accessible(col):
8+
'Check to make sure we can look at every item in column'
9+
col.flatten()
10+
11+
12+
def check_pandas_accessible(col):
13+
assert len(col.array) > 0
14+
15+
716
@pytest.mark.asyncio
817
async def test_root_to_pandas(good_root_file_path):
918
df = await DataConverterAdaptor('root').convert_to_pandas(good_root_file_path)
1019
assert isinstance(df, pd.DataFrame)
1120
assert len(df) == 283458
12-
21+
check_pandas_accessible(df['JetPt'])
1322

1423
@pytest.mark.asyncio
1524
async def test_root_to_pandas_default(good_root_file_path):
@@ -30,18 +39,21 @@ async def test_parquet_to_pandas(good_uproot_file_path):
3039
df = await DataConverterAdaptor('parquet').convert_to_pandas(good_uproot_file_path)
3140
assert isinstance(df, pd.DataFrame)
3241
assert len(df) == 115714
42+
check_pandas_accessible(df['JetPT'])
3343

3444

3545
@pytest.mark.asyncio
3646
async def test_parquet_to_awkward(good_uproot_file_path):
3747
df = await DataConverterAdaptor('parquet').convert_to_awkward(good_uproot_file_path)
3848
assert len(df['JetPT']) == 115714
49+
check_awkward_accessible(df['JetPT'])
3950

4051

4152
@pytest.mark.asyncio
4253
async def test_root_to_awkward(good_root_file_path):
4354
df = await DataConverterAdaptor('root').convert_to_awkward(good_root_file_path)
4455
assert len(df['JetPt']) == 283458
56+
check_awkward_accessible(df['JetPt'])
4557

4658

4759
@pytest.mark.asyncio
@@ -70,6 +82,7 @@ def load_df():
7082
combined = DataConverterAdaptor('root').combine_pandas([df1, df2])
7183

7284
assert len(combined) == len(df1) + len(df2)
85+
check_pandas_accessible(combined['JetPt'])
7386

7487

7588
def test_combine_pandas_from_parquet(good_uproot_file_path):
@@ -84,22 +97,24 @@ def load_df():
8497
combined = DataConverterAdaptor('root').combine_pandas([df1, df2])
8598

8699
assert len(combined) == len(df1) + len(df2)
100+
check_pandas_accessible(combined['JetPT'])
87101

88102

89103
def test_combine_awkward_from_root(good_root_file_path):
90104
'Load a dataframe from root files and make sure that they work when we ask them to combine'
91105
def load_df():
92106
import uproot
93-
with uproot.open(good_root_file_path) as f_in:
94-
df = f_in[f_in.keys()[0]].lazyarrays() # type: ignore
95-
return df
107+
f_in = uproot.open(good_root_file_path)
108+
df = f_in[f_in.keys()[0]].lazyarrays() # type: ignore
109+
return df
96110

97111
df1 = load_df()
98112
df2 = load_df()
99113

100114
combined = DataConverterAdaptor('root').combine_awkward([df1, df2])
101115

102116
assert len(combined) == len(df1) + len(df2)
117+
check_awkward_accessible(combined['JetPt'])
103118

104119

105120
def test_combine_awkward_from_parquet(good_uproot_file_path):
@@ -114,3 +129,4 @@ def load_df():
114129
combined = DataConverterAdaptor('root').combine_awkward([df1, df2])
115130

116131
assert len(combined) == len(df1) + len(df2)
132+
check_awkward_accessible(combined['JetPT'])

0 commit comments

Comments
 (0)