Skip to content

Commit 85623b7

Browse files
committed
Modify getsize to return total size, not just the top level
1 parent 9d046ea commit 85623b7

File tree

3 files changed

+44
-44
lines changed

3 files changed

+44
-44
lines changed

docs/release.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ Enhancements
2424
By :user:`Deepak Cherian <dcherian>`.
2525

2626

27+
Bug fixes
28+
~~~~~~~~~
29+
30+
* ``getsize`` now returns the total size of all nested arrays.
31+
By :user:`Ben Jeffery <benjeffery>` :issue:`253`.
32+
33+
2734
Docs
2835
~~~~
2936

zarr/storage.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,15 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
269269
# also include zarr.json?
270270
# members += ['zarr.json']
271271
else:
272-
members = listdir(store, path)
273-
prefix = _path_to_prefix(path)
274-
members = [prefix + k for k in members]
272+
to_visit = [path]
273+
members = []
274+
while to_visit:
275+
print(to_visit)
276+
current_path = to_visit.pop()
277+
current_members = listdir(store, current_path)
278+
prefix = _path_to_prefix(current_path)
279+
members.extend([prefix + k for k in current_members])
280+
to_visit.extend([prefix + k for k in current_members])
275281
for k in members:
276282
try:
277283
v = store[k]
@@ -971,8 +977,12 @@ def getsize(self, path: Path = None):
971977
elif isinstance(value, self.cls):
972978
# total size for directory
973979
size = 0
974-
for v in value.values():
975-
if not isinstance(v, self.cls):
980+
to_visit = list(value.values())
981+
while to_visit:
982+
v = to_visit.pop()
983+
if isinstance(v, self.cls):
984+
to_visit.extend(v.values())
985+
else:
976986
size += buffer_size(v)
977987
return size
978988

@@ -1269,9 +1279,10 @@ def getsize(self, path=None):
12691279
return os.path.getsize(fs_path)
12701280
elif os.path.isdir(fs_path):
12711281
size = 0
1272-
for child in scandir(fs_path):
1273-
if child.is_file():
1274-
size += child.stat().st_size
1282+
for root, dirs, files in os.walk(fs_path):
1283+
for file in files:
1284+
file_path = os.path.join(root, file)
1285+
size += os.path.getsize(file_path)
12751286
return size
12761287
else:
12771288
return 0
@@ -1903,29 +1914,19 @@ def listdir(self, path=None):
19031914
def getsize(self, path=None):
19041915
path = normalize_storage_path(path)
19051916
with self.mutex:
1906-
children = self.listdir(path)
1907-
if children:
1908-
size = 0
1909-
for child in children:
1910-
if path:
1911-
name = path + "/" + child
1912-
else:
1913-
name = child
1914-
try:
1915-
info = self.zf.getinfo(name)
1916-
except KeyError:
1917-
pass
1918-
else:
1919-
size += info.compress_size
1920-
return size
1921-
elif path:
1917+
to_visit = [path] if path else self.listdir(path)
1918+
total_size = 0
1919+
while to_visit:
1920+
current_path = to_visit.pop()
19221921
try:
1923-
info = self.zf.getinfo(path)
1924-
return info.compress_size
1922+
info = self.zf.getinfo(current_path)
1923+
total_size += info.compress_size
19251924
except KeyError:
1926-
return 0
1927-
else:
1928-
return 0
1925+
children = self.listdir(current_path)
1926+
for child in children:
1927+
full_path = current_path + "/" + child if current_path else child
1928+
to_visit.append(full_path)
1929+
return total_size
19291930

19301931
def clear(self):
19311932
if self.mode == "r":
@@ -2488,6 +2489,8 @@ def listdir(self, path: Path = None):
24882489
return listing
24892490

24902491
def getsize(self, path=None) -> int:
2492+
print("WYF")
2493+
print(self._store, path)
24912494
return getsize(self._store, path=path)
24922495

24932496
def _pop_value(self):
@@ -2745,10 +2748,9 @@ def getsize(self, path=None):
27452748
size = self.cursor.execute(
27462749
"""
27472750
SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr
2748-
WHERE k LIKE (? || "%") AND
2749-
0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/")
2751+
WHERE k LIKE (? || "%")
27502752
""",
2751-
(path, path),
2753+
(path,)
27522754
)
27532755
for (s,) in size:
27542756
return s

zarr/tests/test_storage.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -366,19 +366,10 @@ def test_hierarchy(self):
366366

367367
# test getsize (optional)
368368
if hasattr(store, "getsize"):
369-
# TODO: proper behavior of getsize?
370-
# v3 returns size of all nested arrays, not just the
371-
# size of the arrays in the current folder.
372-
if self.version == 2:
373-
assert 6 == store.getsize()
374-
else:
375-
assert 15 == store.getsize()
369+
assert 15 == store.getsize()
376370
assert 3 == store.getsize("a")
377371
assert 3 == store.getsize("b")
378-
if self.version == 2:
379-
assert 3 == store.getsize("c")
380-
else:
381-
assert 9 == store.getsize("c")
372+
assert 9 == store.getsize("c")
382373
assert 3 == store.getsize("c/d")
383374
assert 6 == store.getsize("c/e")
384375
assert 3 == store.getsize("c/e/f")
@@ -2256,7 +2247,7 @@ def test_getsize():
22562247
store["foo"] = b"aaa"
22572248
store["bar"] = b"bbbb"
22582249
store["baz/quux"] = b"ccccc"
2259-
assert 7 == getsize(store)
2250+
assert 12 == getsize(store)
22602251
assert 5 == getsize(store, "baz")
22612252

22622253
store = KVStore(dict())

0 commit comments

Comments
 (0)