Skip to content

Commit 96714b0

Browse files
authored
adding support for serial and fixing bug that driver might not be defined (#82)
* adding support for serial and fixing bug that driver might not be defined Signed-off-by: vsoch <vsochat@stanford.edu>
1 parent dad0bdc commit 96714b0

File tree

7 files changed

+32
-6
lines changed

7 files changed

+32
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and **Merged pull requests**. Critical items to know are:
1212
Referenced versions in headers are tagged on Github, in parentheses are for pypi.
1313

1414
## [vxx](https://github.com/urlstechie/urlschecker-python/tree/master) (master)
15+
- serial option for debugging (0.0.32)
1516
- adding support for web driver for harder URLs (0.0.31)
1617
- use ANSI escape sequences for colors, fake-useragent for agents (0.0.30)
1718
- adding type hints to code, more tests and logging bug fix (0.0.29)

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,12 @@ urlchecker check --file-types ".*,*.html" .
247247
248248
**Note that while some patterns will work without quotes, it's recommended for most**
249249
to use them because if the shell expands any part of the pattern, it will not work as
250-
expected. By default, the urlchecker checks python and markdown.
250+
expected. By default, the urlchecker checks python and markdown. If a multiprocessing workers has an error,
251+
you can also add `--serial` to run in serial and test. The run will be slower, but it's useful for debugging.
252+
253+
```bash
254+
$ urlchecker check . --files "content/docs/hacking/contributing/documentation/index.md" --serial
255+
```
251256
252257
### Check GitHub Repository
253258

urlchecker/client/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ def get_parser():
7070
action="store_true",
7171
)
7272

73+
check.add_argument(
74+
"--serial",
75+
help="run checks in serial (no multiprocess)",
76+
default=False,
77+
action="store_true",
78+
)
7379
check.add_argument(
7480
"--force-pass",
7581
help="force successful pass (return code 0) regardless of result",

urlchecker/client/check.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def main(args, extra):
6565
print(" subfolder: %s" % args.subfolder)
6666
print(" branch: %s" % args.branch)
6767
print(" cleanup: %s" % args.cleanup)
68+
print(" serial: %s" % args.serial)
6869
print(" file types: %s" % file_types)
6970
print(" files: %s" % files)
7071
print(" print all: %s" % (not args.no_print))
@@ -84,6 +85,7 @@ def main(args, extra):
8485
include_patterns=files,
8586
exclude_files=exclude_files,
8687
print_all=not args.no_print,
88+
serial=args.serial,
8789
)
8890
check_results = checker.run(
8991
exclude_urls=exclude_urls,

urlchecker/core/check.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121

2222
class UrlChecker:
23-
"""The UrlChecker can be instantiated by a client, and then used
23+
"""
24+
The UrlChecker can be instantiated by a client, and then used
2425
to parse files, extract urls, and save results.
2526
"""
2627

@@ -31,6 +32,7 @@ def __init__(
3132
exclude_files: List[str] = None,
3233
print_all: bool = True,
3334
include_patterns: List[str] = None,
35+
serial: bool = False,
3436
):
3537
"""
3638
initiate a url checker. At init we take in preferences for
@@ -43,6 +45,7 @@ def __init__(
4345
- print_all (bool) : control var for whether to print all checked file names or only the ones with urls.
4446
- exclude_files (list) : list of excluded files and patterns for flies.
4547
- include_patterns (list) : list of files and patterns to check.
48+
- serial (bool) : do checks in serial (no multiprocessing)
4649
"""
4750
# Initiate results object, and checks lookup (holds UrlCheck) for each file
4851
self.results = {
@@ -61,6 +64,7 @@ def __init__(
6164
self.path = path
6265
self.file_types = file_types or [".py", ".md"]
6366
self.file_paths = []
67+
self.serial = serial
6468

6569
# get all file paths if a path is defined
6670
if path:
@@ -195,10 +199,11 @@ def run(
195199
random.shuffle(ports)
196200

197201
# loop through files
202+
results = {}
198203
for file_name in file_paths:
199204

200205
# Export parameters and functions, use the same check task for all
201-
tasks[file_name] = {
206+
kwargs = {
202207
"file_name": file_name,
203208
"exclude_patterns": exclude_patterns,
204209
"exclude_urls": exclude_urls,
@@ -207,9 +212,16 @@ def run(
207212
"timeout": timeout,
208213
"port": ports.pop(0),
209214
}
215+
216+
if self.serial:
217+
results[file_name] = check_task(**kwargs)
218+
continue
219+
220+
tasks[file_name] = kwargs
210221
funcs[file_name] = check_task
211222

212-
results = workers.run(funcs, tasks)
223+
if not self.serial:
224+
results = workers.run(funcs, tasks) # type: ignore
213225
if not results:
214226
print("\U0001F914 There were no URLs to check.")
215227
sys.exit(0)

urlchecker/core/urlproc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def check_urls(
242242

243243
# Web driver doesn't have same issues with ssl
244244
except Exception as e:
245-
if driver.check(url):
245+
if driver and driver.check(url):
246246
response = requests.Response()
247247
response.status_code = 200
248248
else:

urlchecker/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
88
"""
99

10-
__version__ = "0.0.31"
10+
__version__ = "0.0.32"
1111
AUTHOR = "Ayoub Malek, Vanessa Sochat"
1212
AUTHOR_EMAIL = "superkogito@gmail.com, vsochat@stanford.edu"
1313
NAME = "urlchecker"

0 commit comments

Comments
 (0)