Skip to content

Commit 6b754eb

Browse files
authored
Merge pull request #28 from urlstechie/add/files-flag
Adding files flag and fixing bug with return code
2 parents cec4424 + a49b28d commit 6b754eb

File tree

11 files changed

+96
-45
lines changed

11 files changed

+96
-45
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and **Merged pull requests**. Critical items to know are:
1212
Referenced versions in headers are tagged on Github, in parentheses are for pypi.
1313

1414
## [vxx](https://github.com/urlstechie/urlschecker-python/tree/master) (master)
15+
- bug with incorrect return code on fail, add files flag (0.0.16)
1516
- reverting back to working client (0.0.15)
1617
- removing unused file variable (0.0.13)
1718
- adding support for csv export (0.0.12)

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ Your most likely use case will be to check a local directory with static files (
4747
for files. In this case, you can use urlchecker check:
4848

4949
```bash
50-
$ urlchecker check --help
51-
5250
$ urlchecker check --help
5351
usage: urlchecker check [-h] [-b BRANCH] [--subfolder SUBFOLDER] [--cleanup]
5452
[--force-pass] [--no-print] [--file-types FILE_TYPES]
53+
[--files FILES]
5554
[--white-listed-urls WHITE_LISTED_URLS]
5655
[--white-listed-patterns WHITE_LISTED_PATTERNS]
5756
[--white-listed-files WHITE_LISTED_FILES]
58-
[--retry-count RETRY_COUNT] [--timeout TIMEOUT]
57+
[--save SAVE] [--retry-count RETRY_COUNT]
58+
[--timeout TIMEOUT]
5959
path
6060

6161
positional arguments:
@@ -78,6 +78,8 @@ optional arguments:
7878
--file-types FILE_TYPES
7979
comma separated list of file extensions to check
8080
(defaults to .md,.py)
81+
--files FILES comma separated list of exact files or patterns to
82+
check.
8183
--white-listed-urls WHITE_LISTED_URLS
8284
comma separated list of white listed urls (no spaces)
8385
--white-listed-patterns WHITE_LISTED_PATTERNS
@@ -86,6 +88,7 @@ optional arguments:
8688
--white-listed-files WHITE_LISTED_FILES
8789
comma separated list of white listed files and
8890
patterns for files (no spaces)
91+
--save SAVE Path to a csv file to save results to.
8992
--retry-count RETRY_COUNT
9093
retry count upon failure (defaults to 2, one retry).
9194
--timeout TIMEOUT timeout (seconds) to provide to the requests library

urlchecker/client/__init__.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import urlchecker
1717
import logging
1818

19+
1920
def get_parser():
2021
parser = argparse.ArgumentParser(description="urlchecker python")
2122

@@ -39,7 +40,7 @@ def get_parser():
3940
subparsers = parser.add_subparsers(
4041
help="urlchecker python actions",
4142
title="actions",
42-
description='actions for urlchecker',
43+
description="actions for urlchecker",
4344
dest="command",
4445
)
4546

@@ -55,8 +56,7 @@ def get_parser():
5556

5657
# supports a clone URL or a path
5758
check.add_argument(
58-
"path",
59-
help="the local path or GitHub repository to clone and check",
59+
"path", help="the local path or GitHub repository to clone and check",
6060
)
6161

6262
check.add_argument(
@@ -99,7 +99,14 @@ def get_parser():
9999
default=".md,.py",
100100
)
101101

102-
# White listing
102+
check.add_argument(
103+
"--files",
104+
dest="files",
105+
help="comma separated list of exact files or patterns to check.",
106+
default=None,
107+
)
108+
109+
# White listing
103110

104111
check.add_argument(
105112
"--white-listed-urls",
@@ -119,15 +126,13 @@ def get_parser():
119126
default="",
120127
)
121128

122-
# Saving
129+
# Saving
123130

124131
check.add_argument(
125-
"--save",
126-
help="Path toa csv file to save results to.",
127-
default=None,
132+
"--save", help="Path to a csv file to save results to.", default=None,
128133
)
129134

130-
# Timeouts
135+
# Timeouts
131136

132137
check.add_argument(
133138
"--retry-count",
@@ -180,7 +185,7 @@ def help(return_code=0):
180185
else:
181186
print("Unsupported command %s" % args.command)
182187
sys.exit(0)
183-
188+
184189
# Pass on to the correct parser
185190
return_code = 0
186191
try:

urlchecker/client/check.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from urlchecker.core.check import run_urlchecker
1414
from urlchecker.logger import print_success, print_failure
1515

16-
logger = logging.getLogger('urlchecker')
16+
logger = logging.getLogger("urlchecker")
1717

1818

1919
def main(args, extra):
@@ -52,6 +52,7 @@ def main(args, extra):
5252
white_listed_urls = remove_empty(args.white_listed_urls.split(","))
5353
white_listed_patterns = remove_empty(args.white_listed_patterns.split(","))
5454
white_listed_files = remove_empty(args.white_listed_files.split(","))
55+
files = remove_empty(args.files.split(","))
5556

5657
# Alert user about settings
5758
print(" original path: %s" % args.path)
@@ -60,6 +61,7 @@ def main(args, extra):
6061
print(" branch: %s" % args.branch)
6162
print(" cleanup: %s" % args.cleanup)
6263
print(" file types: %s" % file_types)
64+
print(" files: %s" % files)
6365
print(" print all: %s" % (not args.no_print))
6466
print(" url whitetlist: %s" % white_listed_urls)
6567
print(" url patterns: %s" % white_listed_patterns)
@@ -70,14 +72,17 @@ def main(args, extra):
7072
print(" timeout: %s" % args.timeout)
7173

7274
# Run checks, get lookup of results and fails
73-
check_results = run_urlchecker(path=path,
74-
file_types=file_types,
75-
white_listed_files=white_listed_files,
76-
white_listed_urls=white_listed_urls,
77-
white_listed_patterns=white_listed_patterns,
78-
print_all=not args.no_print,
79-
retry_count=args.retry_count,
80-
timeout=args.timeout)
75+
check_results = run_urlchecker(
76+
path=path,
77+
file_types=file_types,
78+
include_patterns=files,
79+
white_listed_files=white_listed_files,
80+
white_listed_urls=white_listed_urls,
81+
white_listed_patterns=white_listed_patterns,
82+
print_all=not args.no_print,
83+
retry_count=args.retry_count,
84+
timeout=args.timeout,
85+
)
8186

8287
# save results to flie, if save indicated
8388
if args.save:
@@ -89,17 +94,23 @@ def main(args, extra):
8994
delete_repo(path)
9095

9196
# Case 1: We didn't find any urls to check
92-
if not check_results['failed'] and not check_results['passed']:
97+
if not check_results["failed"] and not check_results["passed"]:
9398
print("\n\nDone. No urls were collected.")
9499
sys.exit(0)
95100

96-
# Case 2: We had errors, but force pass is True
97-
elif args.force_pass and check_results['failed']:
101+
# Case 2: We had errors, print them for the user
102+
if check_results["failed"]:
98103
print("\n\nDone. The following urls did not pass:")
99-
for failed_url in check_results['failed']:
104+
for failed_url in check_results["failed"]:
100105
print_failure(failed_url)
106+
107+
# If we have failures and it's not a force pass, exit with 1
108+
if not args.force_pass and check_results["failed"]:
101109
sys.exit(1)
102110

111+
# Finally, alert user if we are passing conditionally
112+
if check_results["failed"]:
113+
print("\n\nConditional pass force pass True.")
103114
else:
104115
print("\n\nDone. All URLS passed.")
105-
sys.exit(0)
116+
sys.exit(0)

urlchecker/core/check.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,17 @@
1212
from urlchecker.core import fileproc, urlproc
1313
from urlchecker.core.whitelist import white_listed
1414

15+
1516
def run_urlchecker(
1617
path,
1718
file_types,
1819
white_listed_files,
1920
white_listed_urls,
2021
white_listed_patterns,
2122
print_all,
23+
include_patterns=None,
2224
retry_count=2,
23-
timeout=5
25+
timeout=5,
2426
):
2527
"""
2628
Run the url checker given a path, a whitelist for each of url and file
@@ -34,6 +36,7 @@ def run_urlchecker(
3436
- white_listed_urls (list) : list of white-listed urls.
3537
- white_listed_patterns (list) : list of white-listed patterns for urls.
3638
- white_listed_files (list) : list of white-listed files and patterns for flies.
39+
- include_patterns (list) : list of files and patterns to check.
3740
- retry_count (int) : number of retries on failed first check. Default=2.
3841
- timeout (int) : timeout to use when waiting on check feedback. Default=5.
3942
@@ -44,6 +47,7 @@ def run_urlchecker(
4447

4548
# get all file paths
4649
file_paths = fileproc.get_file_paths(
50+
include_patterns=include_patterns,
4751
base_path=path,
4852
file_types=file_types,
4953
white_listed_files=white_listed_files,
@@ -83,7 +87,7 @@ def check_files(
8387
(list) check-results as a list of two lists (successfull checks, failed checks).
8488
"""
8589
# init results list (first is success, second is issue)
86-
check_results = {"passed":[], "failed": []}
90+
check_results = {"passed": [], "failed": []}
8791

8892
# Allow for user to skip specifying white listed options
8993
white_listed_urls = white_listed_urls or []

urlchecker/core/fileproc.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,40 +34,61 @@ def check_file_type(file_path, file_types):
3434
return False
3535

3636

37-
def include_file(file_path, white_list_patterns):
37+
def include_file(file_path, white_list_patterns=None, include_patterns=None):
3838
"""
3939
Check a file path for inclusion based on an OR regular expression.
4040
The user is currently not notified if a file is marked for removal.
4141
4242
Args:
4343
- file_path (str) : a file path to check if should be included.
44-
- white_list_patterns (list) : list of patterns to whitelist (include).
44+
- white_list_patterns (list) : list of patterns to whitelist (not test).
45+
- include_patterns (list) : list of patterns to include.
4546
4647
Returns:
47-
(bool) boolean indicating if the URL should be white listed (included).
48+
(bool) boolean indicating if the URL should be white listed (not tested).
4849
"""
50+
include_patterns = include_patterns or []
51+
white_list_patterns = white_list_patterns or []
52+
4953
# No white listed patterns, all files are included
50-
if not white_list_patterns:
54+
if not white_list_patterns and not include_patterns:
5155
return True
5256

53-
# Return False (don't include) if the patterns match
54-
regexp = "(%s)" % "|".join(white_list_patterns)
55-
return not re.search(regexp, file_path)
57+
# Create a regular expression for each
58+
whitelist_regexp = "(%s)" % "|".join(white_list_patterns)
59+
include_regexp = "(%s)" % "|".join(include_patterns)
60+
61+
# Return False (don't include) if whitelisted
62+
if not include_patterns:
63+
return not re.search(whitelist_regexp, file_path)
64+
65+
# We have an include_patterns only
66+
elif not white_list_patterns:
67+
return re.search(include_regexp, file_path)
68+
69+
# If both defined, whitelisting takes preference
70+
return re.search(include_regexp, file_path) and not re.search(
71+
whitelist_regexp, file_path
72+
)
5673

5774

58-
def get_file_paths(base_path, file_types, white_listed_files=None):
75+
def get_file_paths(
76+
base_path, file_types, white_listed_files=None, include_patterns=None
77+
):
5978
"""
6079
Get path to all files under a give directory and its subfolders.
6180
6281
Args:
6382
- base_path (str) : base path.
6483
- file_types (list) : list of file extensions to accept.
84+
- include_patterns (list) : list of files and patterns to include.
6585
- white_listed_files (list) : list of files or patterns to white list
6686
6787
Returns:
6888
(list) list of file paths.
6989
"""
7090
white_listed_files = white_listed_files or []
91+
include_patterns = include_patterns or []
7192

7293
# init paths
7394
file_paths = []
@@ -79,7 +100,9 @@ def get_file_paths(base_path, file_types, white_listed_files=None):
79100
for file in files
80101
if os.path.isfile(os.path.join(root, file))
81102
and check_file_type(file, file_types)
82-
and include_file(os.path.join(root, file), white_listed_files)
103+
and include_file(
104+
os.path.join(root, file), white_listed_files, include_patterns
105+
)
83106
]
84107
return file_paths
85108

@@ -145,7 +168,7 @@ def save_results(check_results, file_path, sep=",", header=None):
145168
dirname = os.path.dirname(file_path)
146169

147170
if not os.path.exists(dirname):
148-
sys.exit("%s does not exist, cannot save %s there." %(dirname, file_path))
171+
sys.exit("%s does not exist, cannot save %s there." % (dirname, file_path))
149172

150173
# Ensure the header is provided and correct (length 2)
151174
if not header:
@@ -157,10 +180,10 @@ def save_results(check_results, file_path, sep=",", header=None):
157180
print("Saving results to %s" % file_path)
158181

159182
# Write to file after header row
160-
with open(file_path, mode='w') as fd:
183+
with open(file_path, mode="w") as fd:
161184
writer = csv.writer(fd, delimiter=sep, quotechar='"', quoting=csv.QUOTE_MINIMAL)
162185
writer.writerow(header)
163186
for result, items in check_results.items():
164-
[writer.writerow([item, result]) for item in items];
187+
[writer.writerow([item, result]) for item in items]
165188

166189
return file_path

urlchecker/core/urlproc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from urlchecker.core import urlmarker
1515
from urlchecker.logger import print_success, print_failure
1616

17+
1718
def record_response(url, response, check_results):
1819
"""
1920
Record response status of an input url. This function is run after success,

urlchecker/core/whitelist.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
88
"""
99

10+
1011
def white_listed(url, white_listed_urls, white_listed_patterns):
1112
"""
1213
Check if link is in the white listed URLs or patterns to ignore.

urlchecker/logger.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import logging
1111

12+
1213
def print_failure(message):
1314
"""
1415
Given a message string, print as a failure in red.
@@ -29,7 +30,7 @@ def print_success(message):
2930
print("\x1b[32m" + message + "\x1b[0m")
3031

3132

32-
def get_logger(name='urlchecker', level=logging.INFO):
33+
def get_logger(name="urlchecker", level=logging.INFO):
3334
"""
3435
Get a default logger for the urlchecker library, meaning
3536
that we use name "urlchecker" and use the default logging
@@ -50,7 +51,9 @@ def get_logger(name='urlchecker', level=logging.INFO):
5051
ch.setLevel(logging.ERROR)
5152

5253
# formatting
53-
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
54+
formatter = logging.Formatter(
55+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
56+
)
5457
ch.setFormatter(formatter)
5558
logger.addHandler(ch)
5659
return logger

urlchecker/main/github.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def delete_repo(base_path):
5555
return result.returncode
5656

5757

58-
5958
def get_branch(default="master"):
6059
"""
6160
Derive the selected branch. We first look to the environment variable

0 commit comments

Comments
 (0)