Skip to content

Commit 85f46ba

Browse files
authored
Merge pull request #22 from TG1999/http_ftp_urls
Add support for downloading http and ftp urls
2 parents 5e4d51d + 1a7ee55 commit 85f46ba

File tree

6 files changed

+205
-101
lines changed

6 files changed

+205
-101
lines changed

README.md

Lines changed: 0 additions & 15 deletions
This file was deleted.

README.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
*****
2+
Fetchcode
3+
*****
4+
It is a library to reliably fetch code via HTTP, FTP and version control systems.
5+
6+
Installation
7+
############
8+
Clone the repo using
9+
`git clone https://github.com/nexB/fetchcode`
10+
11+
Then install all the requirements using
12+
`pip3 install -r requirements.txt`
13+
14+
Running test suite
15+
#################
16+
17+
To run test suite
18+
`python3 -m pytest`
19+
20+
Usage of API to fetch HTTP/S and FTP URLs
21+
#########################################
22+
```
23+
from fetchcode import fetch
24+
url = 'A Http or FTP URL'
25+
location = 'Location of file'
26+
# This returns a response object which has attributes
27+
# 'content_type' content type of the file
28+
# 'location' the absolute location of the files that was fetched
29+
# 'scheme' scheme of the URL
30+
# 'size' size of the retrieved content in bytes
31+
# 'url' fetched URL
32+
resp = fetch(url = url)
33+
```

fetchcode/__init__.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# fetchcode is a free software tool from nexB Inc. and others.
2+
# Visit https://github.com/nexB/fetchcode for support and download.
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# http://nexb.com and http://aboutcode.org
6+
#
7+
# This software is licensed under the Apache License version 2.0.
8+
#
9+
# You may not use this software except in compliance with the License.
10+
# You may obtain a copy of the License at:
11+
# http://apache.org/licenses/LICENSE-2.0
12+
# Unless required by applicable law or agreed to in writing, software distributed
13+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations under the License.
16+
17+
from ftplib import FTP
18+
from mimetypes import MimeTypes
19+
import os
20+
import tempfile
21+
from urllib.parse import urlparse
22+
23+
import requests
24+
25+
26+
class Response:
27+
def __init__(self, location, content_type, size, url):
28+
"""
29+
Represent the response from fetching a URL with:
30+
- `location`: the absolute location of the files that was fetched
31+
- `content_type`: content type of the file
32+
- `size`: size of the retrieved content in bytes
33+
- `url`: fetched URL
34+
"""
35+
self.url = url
36+
self.size = size
37+
self.content_type = content_type
38+
self.location = location
39+
40+
41+
def fetch_http(url, location):
42+
"""
43+
Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string
44+
saving the content in a file at `location`
45+
"""
46+
r = requests.get(url)
47+
with open(location, 'wb') as f:
48+
f.write(r.content)
49+
50+
content_type = r.headers.get('content-type')
51+
size = r.headers.get('content-length')
52+
size = int(size) if size else None
53+
54+
resp = Response(location=location, content_type=content_type, size=size, url=url)
55+
56+
return resp
57+
58+
59+
def fetch_ftp(url, location):
60+
"""
61+
Return a `Response` object built from fetching the content at a FTP based `url` URL string
62+
saving the content in a file at `location`
63+
"""
64+
url_parts = urlparse(url)
65+
66+
netloc = url_parts.netloc
67+
path = url_parts.path
68+
dir, file = os.path.split(path)
69+
70+
ftp = FTP(netloc)
71+
ftp.login()
72+
73+
size = ftp.size(path)
74+
mime = MimeTypes()
75+
mime_type = mime.guess_type(file)
76+
if mime_type:
77+
content_type = mime_type[0]
78+
else:
79+
content_type = None
80+
81+
ftp.cwd(dir)
82+
file = 'RETR {}'.format(file)
83+
with open(location, 'wb') as f:
84+
ftp.retrbinary(file, f.write)
85+
ftp.close()
86+
87+
resp = Response(location=location, content_type=content_type, size=size, url=url)
88+
return resp
89+
90+
91+
def fetch(url):
92+
"""
93+
Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file.
94+
"""
95+
96+
temp = tempfile.NamedTemporaryFile(delete=False)
97+
location = temp.name
98+
99+
url_parts = urlparse(url)
100+
scheme = url_parts.scheme
101+
102+
fetchers = {'ftp': fetch_ftp, 'http': fetch_http, 'https': fetch_http}
103+
104+
if scheme in fetchers:
105+
return fetchers.get(scheme)(url, location)
106+
107+
raise Exception('Not a supported/known scheme.')

fetchcode/api.py

Lines changed: 0 additions & 54 deletions
This file was deleted.

tests/test_api.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

tests/test_fetch.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# fetchcode is a free software tool from nexB Inc. and others.
2+
# Visit https://github.com/nexB/fetchcode for support and download.
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# http://nexb.com and http://aboutcode.org
6+
#
7+
# This software is licensed under the Apache License version 2.0.
8+
#
9+
# You may not use this software except in compliance with the License.
10+
# You may obtain a copy of the License at:
11+
# http://apache.org/licenses/LICENSE-2.0
12+
# Unless required by applicable law or agreed to in writing, software distributed
13+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations under the License.
16+
17+
from unittest import mock
18+
19+
import pytest
20+
21+
from fetchcode import fetch
22+
23+
24+
@mock.patch('fetchcode.requests.get')
25+
def test_fetch_http_with_tempfile(mock_get):
26+
mock_get.return_value.headers = {
27+
'content-type': 'image/png',
28+
'content-length': '1000999',
29+
}
30+
31+
with mock.patch('fetchcode.open', mock.mock_open()) as mocked_file:
32+
url = 'https://raw.githubusercontent.com/TG1999/converge/master/assets/Group%2022.png'
33+
response = fetch(url=url)
34+
assert response is not None
35+
assert 1000999 == response.size
36+
assert url == response.url
37+
assert 'image/png' == response.content_type
38+
39+
40+
@mock.patch('fetchcode.FTP')
41+
def test_fetch_with_wrong_url(mock_get):
42+
with pytest.raises(Exception) as e_info:
43+
url = 'ftp://speedtest/1KB.zip'
44+
response = fetch(url=url)
45+
assert 'Not a valid URL' == e_info
46+
47+
48+
@mock.patch('fetchcode.FTP', autospec=True)
49+
def test_fetch_ftp_with_tempfile(mock_ftp_constructor):
50+
mock_ftp = mock_ftp_constructor.return_value
51+
mock_ftp_constructor.return_value.size.return_value = 1024
52+
with mock.patch('fetchcode.open', mock.mock_open()) as mocked_file:
53+
response = fetch('ftp://speedtest.tele2.net/1KB.zip')
54+
assert 1024 == response.size
55+
mock_ftp_constructor.assert_called_with('speedtest.tele2.net')
56+
assert mock_ftp.login.called == True
57+
mock_ftp.cwd.assert_called_with('/')
58+
assert mock_ftp.retrbinary.called
59+
60+
61+
def test_fetch_with_scheme_not_present():
62+
with pytest.raises(Exception) as e_info:
63+
url = 'abc://speedtest/1KB.zip'
64+
response = fetch(url=url)
65+
assert 'Not a supported/known scheme.' == e_info

0 commit comments

Comments
 (0)