|
30 | 30 | from collections import OrderedDict
|
31 | 31 | from collections.abc import MutableMapping
|
32 | 32 | from functools import lru_cache
|
33 |
| -from os import scandir |
34 | 33 | from pickle import PicklingError
|
35 | 34 | from threading import Lock, RLock
|
36 | 35 | from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any
|
@@ -270,9 +269,15 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
|
270 | 269 | # also include zarr.json?
|
271 | 270 | # members += ['zarr.json']
|
272 | 271 | else:
|
273 |
| - members = listdir(store, path) |
274 |
| - prefix = _path_to_prefix(path) |
275 |
| - members = [prefix + k for k in members] |
| 272 | + to_visit = [path] |
| 273 | + members = [] |
| 274 | + while to_visit: |
| 275 | + print(to_visit) |
| 276 | + current_path = to_visit.pop() |
| 277 | + current_members = listdir(store, current_path) |
| 278 | + prefix = _path_to_prefix(current_path) |
| 279 | + members.extend([prefix + k for k in current_members]) |
| 280 | + to_visit.extend([prefix + k for k in current_members]) |
276 | 281 | for k in members:
|
277 | 282 | try:
|
278 | 283 | v = store[k]
|
@@ -976,8 +981,12 @@ def getsize(self, path: Path = None):
|
976 | 981 | elif isinstance(value, self.cls):
|
977 | 982 | # total size for directory
|
978 | 983 | size = 0
|
979 |
| - for v in value.values(): |
980 |
| - if not isinstance(v, self.cls): |
| 984 | + to_visit = list(value.values()) |
| 985 | + while to_visit: |
| 986 | + v = to_visit.pop() |
| 987 | + if isinstance(v, self.cls): |
| 988 | + to_visit.extend(v.values()) |
| 989 | + else: |
981 | 990 | size += buffer_size(v)
|
982 | 991 | return size
|
983 | 992 |
|
@@ -1274,9 +1283,13 @@ def getsize(self, path=None):
|
1274 | 1283 | return os.path.getsize(fs_path)
|
1275 | 1284 | elif os.path.isdir(fs_path):
|
1276 | 1285 | size = 0
|
1277 |
| - for child in scandir(fs_path): |
1278 |
| - if child.is_file(): |
1279 |
| - size += child.stat().st_size |
| 1286 | + for root, _, files in os.walk(fs_path): |
| 1287 | + # Include the size of the directory itself, as this can be substantial |
| 1288 | + # for directories with many files. |
| 1289 | + size += os.path.getsize(root) |
| 1290 | + for file in files: |
| 1291 | + file_path = os.path.join(root, file) |
| 1292 | + size += os.path.getsize(file_path) |
1280 | 1293 | return size
|
1281 | 1294 | else:
|
1282 | 1295 | return 0
|
@@ -1921,29 +1934,19 @@ def listdir(self, path=None):
|
1921 | 1934 | def getsize(self, path=None):
|
1922 | 1935 | path = normalize_storage_path(path)
|
1923 | 1936 | with self.mutex:
|
1924 |
| - children = self.listdir(path) |
1925 |
| - if children: |
1926 |
| - size = 0 |
1927 |
| - for child in children: |
1928 |
| - if path: |
1929 |
| - name = path + "/" + child |
1930 |
| - else: |
1931 |
| - name = child |
1932 |
| - try: |
1933 |
| - info = self.zf.getinfo(name) |
1934 |
| - except KeyError: |
1935 |
| - pass |
1936 |
| - else: |
1937 |
| - size += info.compress_size |
1938 |
| - return size |
1939 |
| - elif path: |
| 1937 | + to_visit = [path] if path else self.listdir(path) |
| 1938 | + total_size = 0 |
| 1939 | + while to_visit: |
| 1940 | + current_path = to_visit.pop() |
1940 | 1941 | try:
|
1941 |
| - info = self.zf.getinfo(path) |
1942 |
| - return info.compress_size |
| 1942 | + info = self.zf.getinfo(current_path) |
| 1943 | + total_size += info.compress_size |
1943 | 1944 | except KeyError:
|
1944 |
| - return 0 |
1945 |
| - else: |
1946 |
| - return 0 |
| 1945 | + children = self.listdir(current_path) |
| 1946 | + for child in children: |
| 1947 | + full_path = current_path + "/" + child if current_path else child |
| 1948 | + to_visit.append(full_path) |
| 1949 | + return total_size |
1947 | 1950 |
|
1948 | 1951 | def clear(self):
|
1949 | 1952 | if self.mode == "r":
|
@@ -2527,6 +2530,8 @@ def listdir(self, path: Path = None):
|
2527 | 2530 | return listing
|
2528 | 2531 |
|
2529 | 2532 | def getsize(self, path=None) -> int:
|
| 2533 | + print("WYF") |
| 2534 | + print(self._store, path) |
2530 | 2535 | return getsize(self._store, path=path)
|
2531 | 2536 |
|
2532 | 2537 | def _pop_value(self):
|
@@ -2795,10 +2800,9 @@ def getsize(self, path=None):
|
2795 | 2800 | size = self.cursor.execute(
|
2796 | 2801 | """
|
2797 | 2802 | SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr
|
2798 |
| - WHERE k LIKE (? || "%") AND |
2799 |
| - 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") |
| 2803 | + WHERE k LIKE (? || "%") |
2800 | 2804 | """,
|
2801 |
| - (path, path), |
| 2805 | + (path,), |
2802 | 2806 | )
|
2803 | 2807 | for (s,) in size:
|
2804 | 2808 | return s
|
|
0 commit comments