Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ Other examples of URLs that ``smart_open`` accepts::
s3://my_key:my_secret@my_server:my_port@my_bucket/my_key
gs://my_bucket/my_blob
azure://my_bucket/my_blob
oss://my_bucket/my_key
oss://my_key:my_secret@my_bucket/my_key
oss://my_key:my_secret@my_endpoint@my_bucket/my_key
hdfs:///path/file
hdfs://path/file
webhdfs://host:port/path/file
Expand Down Expand Up @@ -121,6 +124,7 @@ You can install these dependencies explicitly using::
pip install smart_open[azure] # Install Azure deps
pip install smart_open[gcs] # Install GCS deps
pip install smart_open[s3] # Install S3 deps
pip install smart_open[oss] # Install OSS deps

Or, if you don't mind installing a large number of third party libraries, you can install all dependencies using::

Expand Down Expand Up @@ -216,6 +220,21 @@ For the sake of simplicity, the examples below assume you have all the dependenc
with open('azure://mycontainer/my_file.txt', 'wb', transport_params=transport_params) as fout:
fout.write(b'hello world')

# stream content from Alicloud OSS
with open('oss://my_ak:my_sk@my_endpoint@my_bucket/robots.txt', 'rb') as fin:
for line in fin:
print(repr(line.decode('utf-8')))
offset = fin.seek(0) # seek to the beginning
print(fin.read(4))

endpoint = 'https://oss-cn-hangzhou.aliyuncs.com'
oss_client= oss2.Bucket(oss2.Auth('my_ak', 'my_sk'), endpoint, 'my_bucket')
url = 'oss://niejn/test.txt'
with open(url, 'wb', transport_params={'client': oss_client}) as fout:
bytes_written = fout.write(b'hello world!')
print(bytes_written)


Compression Handling
--------------------

Expand Down Expand Up @@ -289,6 +308,7 @@ Transport-specific Options
- WebHDFS
- GCS
- Azure Blob Storage
- Alicloud OSS

Each option involves setting up its own set of parameters.
For example, for accessing S3, you often need to set up authentication, like API keys or a profile name.
Expand Down
104 changes: 104 additions & 0 deletions integration-tests/test_oss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import contextlib
import os
import random
import string
import uuid

from pytest import fixture
import oss2

import smart_open

_BUCKET = os.environ.get('OSS_BUCKET')
assert _BUCKET is not None, 'please set the OSS_BUCKET environment variable'

_KEY = os.environ.get('OSS_KEY')
assert _KEY is not None, 'please set the OSS_KEY environment variable'

_AK = os.environ.get('OSS_AK')
assert _AK is not None, 'please set the OSS_AK environment variable'

_SK = os.environ.get('OSS_SK')
assert _SK is not None, 'please set the OSS_SK environment variable'

_ENDPOINT = os.environ.get('OSS_ENDPOINT', 'https://oss-cn-hangzhou.aliyuncs.com')
assert _ENDPOINT is not None, 'please set the OSS_ENDPOINT environment variable'


def get_uuid():
return str(uuid.uuid4())[:6]


def _get_oss_bucket(bucket_name, endpoint, ak, sk):
return oss2.Bucket(oss2.Auth(ak, sk), endpoint, bucket_name)


def _get_obj_iter(oss_bucket, prefix):
for info in oss2.ObjectIterator(oss_bucket,
prefix=prefix,
delimiter='/',
max_keys=100):
try:
yield info.key
except (oss2.exceptions.NoSuchKey, oss2.exceptions.NotFound) as e:
continue
except Exception as e:
raise e


def _delete_obj_by_prefix(oss_bucket, prefix):
for obj_key in _get_obj_iter(oss_bucket, prefix):
oss_bucket.delete_object(obj_key)


#
# https://stackoverflow.com/questions/13484726/safe-enough-8-character-short-unique-random-string
#
def _random_string(length=8):
alphabet = string.ascii_lowercase + string.digits
return ''.join(random.choices(alphabet, k=length))


@fixture
def oss_bucket():
return _get_oss_bucket(_BUCKET, _ENDPOINT, _AK, _SK)


@contextlib.contextmanager
def temporary(oss_bucket):
"""Yields a URL than can be used for temporary writing.

Removes all content under the URL when exiting.
"""
key = '%s/%s' % (_KEY, _random_string())
yield 'oss://%s/%s' % (_BUCKET, key)

# oss_bucket = _get_oss_bucket(_BUCKET, _ENDPOINT, _AK, _SK)
_delete_obj_by_prefix(oss_bucket, prefix=key)


def _test_case(function):
def inner(benchmark, oss_bucket):
with temporary(oss_bucket) as uri:
return function(benchmark, oss_bucket, uri)

return inner


def write_read(uri, content, write_mode, read_mode, encoding=None, oss_bucket=None, **kwargs):
transport_params = dict(kwargs)
transport_params.update(client=oss_bucket)

# with open(url, 'wb', transport_params={'client': oss_client}) as fout:
with smart_open.open(uri, write_mode, encoding=encoding, transport_params=transport_params) as fout:
fout.write(content)
with smart_open.open(uri, read_mode, encoding=encoding, transport_params=transport_params) as fin:
actual = fin.read()
return actual


@_test_case
def test_oss_readwrite_text(benchmark, oss_bucket, uri):
text = 'с гранатою в кармане, с чекою в руке'
actual = benchmark(write_read, uri, text, 'w', 'r', 'utf-8', oss_bucket)
assert actual == text
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def read(fname):
gcs_deps = ['google-cloud-storage']
azure_deps = ['azure-storage-blob', 'azure-common', 'azure-core']
http_deps = ['requests']
oss_deps = ['oss2', 'tenacity']

all_deps = aws_deps + gcs_deps + azure_deps + http_deps
all_deps = aws_deps + gcs_deps + azure_deps + http_deps + oss_deps
tests_require = all_deps + [
'moto[server]==1.3.14', # Older versions of moto appear broken
'pathlib2',
Expand Down Expand Up @@ -79,6 +80,7 @@ def read(fname):
'azure': azure_deps,
'all': all_deps,
'http': http_deps,
'oss': oss_deps,
'webhdfs': http_deps,
},
python_requires=">=3.6,<4.0",
Expand Down
Loading