Skip to content

Commit 843dfba

Browse files
Danilo PocciaDanilo Poccia
Danilo Poccia
authored and
Danilo Poccia
committed
Writeback removed - performance improvements
1 parent 76dd5ce commit 843dfba

File tree

2 files changed

+71
-2205
lines changed

2 files changed

+71
-2205
lines changed

yas3fs

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ class YAS3FS(LoggingMixIn, Operations):
518518
def __init__(self, options):
519519
# Some constants
520520
### self.http_listen_path_length = 30
521-
self.download_running = True
521+
self.running = True
522522
self.check_status_interval = 5.0 # Seconds, no need to configure that
523523

524524
# Initialization
@@ -741,7 +741,7 @@ class YAS3FS(LoggingMixIn, Operations):
741741
# Cleanup for unmount
742742
logger.info('File system unmount...')
743743

744-
self.download_running = False
744+
self.running = False
745745

746746
if self.http_listen_thread:
747747
self.httpd.shutdown() # To stop HTTP listen thread
@@ -924,7 +924,7 @@ class YAS3FS(LoggingMixIn, Operations):
924924
logger.info("num_entries, mem_size, disk_size, download_queue, prefetch_queue: %i, %i, %i, %i, %i"
925925
% (num_entries, mem_size, disk_size, self.download_queue.qsize(), self.prefetch_queue.qsize()))
926926

927-
if self.download_running:
927+
if self.running:
928928
for i in self.download_threads.keys():
929929
if not self.download_threads[i].is_alive():
930930
logger.debug("Download thread restarted!")
@@ -992,7 +992,8 @@ class YAS3FS(LoggingMixIn, Operations):
992992
(parent_path, dir) = os.path.split(path)
993993
logger.debug("parent_path '%s'" % (parent_path))
994994
with self.cache.get_lock(path):
995-
dirs = self.cache.get(parent_path, 'readdir')
995+
# dirs = self.cache.get(parent_path, 'readdir')
996+
dirs = self.readdir(parent_path)
996997
if dirs != None and dirs.count(dir) > 0:
997998
dirs.remove(dir)
998999

@@ -1017,16 +1018,27 @@ class YAS3FS(LoggingMixIn, Operations):
10171018
if key:
10181019
logger.debug("get_key from cache '%s'" % (path))
10191020
return key
1020-
logger.debug("get_key from S3 #1 '%s'" % (path))
1021-
key = self.s3_bucket.get_key(self.join_prefix(path))
1022-
if not key and path != '/':
1023-
full_path = path + '/'
1024-
logger.debug("get_key from S3 #2 '%s' '%s'" % (path, full_path))
1025-
key = self.s3_bucket.get_key(self.join_prefix(full_path))
1026-
if key:
1027-
logger.debug("get_key to cache '%s'" % (path))
1028-
self.cache.set(path, 'key', key)
1021+
look_on_S3 = False
1022+
if path == '/':
1023+
look_on_S3 = True
10291024
else:
1025+
(parent_path, file) = os.path.split(path)
1026+
dirs = self.readdir(parent_path)
1027+
if file in dirs: # We know it can be found on S3
1028+
look_on_S3 = True
1029+
if look_on_S3:
1030+
logger.debug("get_key from S3 #1 '%s'" % (path))
1031+
key = self.s3_bucket.get_key(self.join_prefix(path))
1032+
if not key and path != '/':
1033+
full_path = path + '/'
1034+
logger.debug("get_key from S3 #2 '%s' '%s'" % (path, full_path))
1035+
key = self.s3_bucket.get_key(self.join_prefix(full_path))
1036+
if key:
1037+
logger.debug("get_key to cache '%s'" % (path))
1038+
self.cache.set(path, 'key', key)
1039+
else:
1040+
logger.debug("get_key not on S3 '%s'" % (path))
1041+
if not key:
10301042
logger.debug("get_key no '%s'" % (path))
10311043
return key
10321044

@@ -1114,7 +1126,7 @@ class YAS3FS(LoggingMixIn, Operations):
11141126
if not (metadata_name == 'attr' and k == 'st_size')]) # For the size use the key.size
11151127
key.metadata[metadata_name] = s
11161128
if (not data) or (data and (not data.has('change'))):
1117-
logger.debug("writing metadata '%s' '%s'" % (path, key))
1129+
logger.debug("writing metadata '%s' '%s' S3" % (path, key))
11181130
md = key.metadata
11191131
md['Content-Type'] = key.content_type # Otherwise we loose the Content-Type with S3 Copy
11201132
key.copy(key.bucket.name, key.name, md, preserve_acl=False) # Do I need to preserve ACL?
@@ -1211,16 +1223,19 @@ class YAS3FS(LoggingMixIn, Operations):
12111223
full_path = path + '/'
12121224
else:
12131225
full_path = path # To manage '/' with an empty s3_prefix
1214-
if path != '/' or self.write_metadata:
1215-
k.key = self.join_prefix(full_path)
1216-
logger.debug("mkdir '%s' '%s' '%s' S3" % (path, mode, k))
1217-
k.set_contents_from_string('', headers={'Content-Type': 'application/x-directory'})
12181226
self.cache.set(path, 'key', k)
1219-
data.delete('change')
12201227
if path != '/':
12211228
self.cache.set(path, 'readdir', ['.', '..']) # the directory is empty
12221229
self.add_to_parent_readdir(path)
1230+
1231+
if path != '/' or self.write_metadata:
1232+
k.key = self.join_prefix(full_path)
1233+
logger.debug("mkdir '%s' '%s' '%s' S3" % (path, mode, k))
1234+
k.set_contents_from_string('', headers={'Content-Type': 'application/x-directory'})
1235+
data.delete('change')
1236+
if path != '/': ### Do I need this???
12231237
self.publish(['mkdir', path])
1238+
12241239
return 0
12251240

12261241
def symlink(self, path, link):
@@ -1240,7 +1255,7 @@ class YAS3FS(LoggingMixIn, Operations):
12401255
attr['st_ctime'] = now
12411256
attr['st_size'] = 0
12421257
attr['st_mode'] = (stat.S_IFLNK | 0755)
1243-
self.cache.delete(path)
1258+
self.cache.delete(path)
12441259
self.cache.add(path)
12451260
if self.cache_on_disk > 0:
12461261
data = FSData(self.cache, 'mem', path) # New files (almost) always cache in mem - is it ok ???
@@ -1255,12 +1270,14 @@ class YAS3FS(LoggingMixIn, Operations):
12551270
self.write(path, link, 0)
12561271
data.close()
12571272
k.key = self.join_prefix(path)
1258-
logger.debug("symlink '%s' '%s' '%s' S3" % (path, link, k))
1259-
k.set_contents_from_string(link, headers={'Content-Type': 'application/x-symlink'})
12601273
self.cache.set(path, 'key', k)
1261-
data.delete('change')
12621274
self.add_to_parent_readdir(path)
1263-
self.publish(['symlink', path])
1275+
1276+
logger.debug("symlink '%s' '%s' '%s' S3" % (path, link, k))
1277+
k.set_contents_from_string(link, headers={'Content-Type': 'application/x-symlink'})
1278+
data.delete('change')
1279+
self.publish(['symlink', path])
1280+
12641281
return 0
12651282

12661283
def check_data(self, path):
@@ -1323,7 +1340,7 @@ class YAS3FS(LoggingMixIn, Operations):
13231340
self.download_queue.put(option_list)
13241341

13251342
def download(self, prefetch=False):
1326-
while self.download_running:
1343+
while self.running:
13271344
try:
13281345
if prefetch:
13291346
(path, start, end) = self.prefetch_queue.get(True, 1) # 1 second time-out
@@ -1469,16 +1486,22 @@ class YAS3FS(LoggingMixIn, Operations):
14691486
if not k:
14701487
logger.debug("rmdir '%s' ENOENT" % (path))
14711488
raise FuseOSError(errno.ENOENT)
1472-
full_path = self.join_prefix(path + '/')
1473-
key_list = self.s3_bucket.list(full_path) # Don't need to set a delimeter here
1474-
for l in key_list:
1475-
if l.name != full_path:
1476-
logger.debug("rmdir '%s' ENOTEMPTY" % (path))
1477-
raise FuseOSError(errno.ENOTEMPTY)
1478-
k.delete()
1489+
if len(self.readdir(path)) > 2:
1490+
logger.debug("rmdir '%s' ENOTEMPTY" % (path))
1491+
raise FuseOSError(errno.ENOTEMPTY)
1492+
#full_path = self.join_prefix(path + '/')
1493+
#key_list = self.s3_bucket.list(full_path) # Don't need to set a delimeter here
1494+
#for l in key_list:
1495+
# if l.name != full_path:
1496+
# logger.debug("rmdir '%s' ENOTEMPTY" % (path))
1497+
# raise FuseOSError(errno.ENOTEMPTY)
1498+
1499+
logger.debug("rmdir '%s' '%s' S3" % (path, k))
1500+
k.delete()
1501+
self.publish(['rmdir', path])
1502+
14791503
self.cache.reset(path) # Cache invaliation
14801504
self.remove_from_parent_readdir(path)
1481-
self.publish(['rmdir', path])
14821505
return 0
14831506

14841507
def truncate(self, path, size):
@@ -1558,7 +1581,8 @@ class YAS3FS(LoggingMixIn, Operations):
15581581
md['Content-Type'] = key.content_type # Otherwise we loose the Content-Type with S3 Copy
15591582
key.copy(key.bucket.name, target, md, preserve_acl=False) # Do I need to preserve ACL?
15601583
key.delete()
1561-
self.publish(['rename', source_path, target_path])
1584+
self.publish(['rename', source_path, target_path])
1585+
15621586
self.remove_from_parent_readdir(path)
15631587
self.add_to_parent_readdir(new_path)
15641588

@@ -1605,10 +1629,12 @@ class YAS3FS(LoggingMixIn, Operations):
16051629
logger.debug("unlink '%s' ENOENT" % (path))
16061630
raise FuseOSError(errno.ENOENT)
16071631
if k:
1632+
logger.debug("unlink '%s' '%s' S3" % (path, k))
16081633
k.delete()
1634+
self.publish(['unlink', path])
1635+
16091636
self.cache.reset(path)
16101637
self.remove_from_parent_readdir(path)
1611-
self.publish(['unlink', path])
16121638
return 0
16131639

16141640
def create(self, path, mode, fi=None):
@@ -1737,20 +1763,25 @@ class YAS3FS(LoggingMixIn, Operations):
17371763
old_size = 0
17381764
else:
17391765
old_size = k.size
1766+
17401767
written = False
17411768
if self.multipart_num > 0:
17421769
full_size = attr['st_size']
17431770
if full_size > self.multipart_size:
1744-
k = self.multipart_upload(k.name, data, full_size,
1745-
headers={'Content-Type': type}, metadata=k.metadata)
1746-
k = self.get_key(path, cache=False)
1771+
logger.debug("flush '%s' '%s' '%s' '%s' S3" % (path, fh, k, type))
1772+
new_k = self.multipart_upload(k.name, data, full_size,
1773+
headers={'Content-Type': type}, metadata=k.metadata)
1774+
new_k = self.get_key(path, cache=False)
1775+
etag = new_k.etag[1:-1]
17471776
written = True
17481777
if not written:
17491778
logger.debug("flush '%s' '%s' '%s' '%s' S3" % (path, fh, k, type))
17501779
k.set_contents_from_file(data.content, headers={'Content-Type': type})
1751-
data.update_etag(k.etag[1:-1])
1780+
etag = k.etag[1:-1]
1781+
data.update_etag(etag)
17521782
data.delete('change')
1753-
self.publish(['flush', path, k.etag[1:-1]])
1783+
self.publish(['flush', path, etag])
1784+
17541785
return 0
17551786

17561787
def multipart_upload(self, key_path, data, full_size, headers, metadata):

0 commit comments

Comments
 (0)