diff --git a/README.md b/README.md index 843a620..8ea405a 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,8 @@ python3 -m jupyterhub_idle_culler [--timeout=900] [--url=http://localhost:8081/h same time can slow down the Hub, so limit the number of API requests we have outstanding at any given time. (default 10) + --config Service configuration file to load. + (default idle_culler_config.py) --cull-admin-users Whether admin users should be culled (only if --cull-users=true). (default True) --cull-every The interval (in seconds) for checking for diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index cddbde3..9cd729e 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -5,8 +5,10 @@ import asyncio import json +import logging import os import ssl +import sys from datetime import datetime, timezone from functools import partial from textwrap import dedent @@ -17,8 +19,9 @@ from tornado.httpclient import AsyncHTTPClient, HTTPRequest from tornado.httputil import url_concat from tornado.ioloop import IOLoop, PeriodicCallback -from tornado.log import app_log -from tornado.options import define, options, parse_command_line +from tornado.log import LogFormatter +from traitlets import Bool, Int, Unicode, default +from traitlets.config import Application __version__ = "1.4.1.dev" @@ -79,6 +82,7 @@ async def cull_idle( url, api_token, inactive_limit, + logger, cull_users=False, remove_named_servers=False, max_age=0, @@ -107,8 +111,8 @@ async def cull_idle( f"{internal_certs_location}/hub-ca/hub-ca.crt", ) - app_log.debug("ssl_enabled is Enabled: %s", ssl_enabled) - app_log.debug("internal_certs_location is %s", internal_certs_location) + logger.debug("ssl_enabled is Enabled: %s", ssl_enabled) + logger.debug("internal_certs_location is %s", internal_certs_location) defaults["ssl_options"] = ssl_context AsyncHTTPClient.configure(None, defaults=defaults) @@ -153,7 +157,7 @@ async def fetch_paginated(req): next_info = resp_model["_pagination"]["next"] if next_info: page_no += 1 - app_log.info(f"Fetching page {page_no} {next_info['url']}") + logger.info(f"Fetching page {page_no} {next_info['url']}") # submit next request req.url = next_info["url"] resp_future = asyncio.ensure_future(fetch(req)) @@ -162,7 +166,7 @@ async def fetch_paginated(req): item_count += 1 yield item - app_log.debug(f"Fetched {item_count} items from {url} in {page_no} pages") + logger.debug(f"Fetched {item_count} items from {url} in {page_no} pages") # Starting with jupyterhub 1.3.0 the users can be filtered in the server # using the `state` filter parameter. "ready" means all users who have any @@ -187,7 +191,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): if server_name: log_name = f"{user['name']}/{server_name}" if server.get("pending"): - app_log.warning( + logger.warning( f"Not culling server {log_name} with pending {server['pending']}" ) return False @@ -200,7 +204,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): # but let's check just to be safe. if not server.get("ready", bool(server["url"])): - app_log.warning( + logger.warning( f"Not culling not-ready not-pending server {log_name}: {server}" ) return False @@ -248,7 +252,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): ) ) if should_cull: - app_log.info( + logger.info( f"Culling server {log_name} (inactive for {format_td(inactive)})" ) @@ -257,7 +261,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): # so that we can still be compatible with jupyterhub 0.8 # which doesn't define the 'started' field if age is not None and age.total_seconds() >= max_age: - app_log.info( + logger.info( "Culling server %s (age: %s, inactive for %s)", log_name, format_td(age), @@ -266,7 +270,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): should_cull = True if not should_cull: - app_log.debug( + logger.debug( "Not culling server %s (age: %s, inactive for %s)", log_name, format_td(age), @@ -303,7 +307,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): ) resp = await fetch(req) if resp.code == 202: - app_log.warning(f"Server {log_name} is slow to stop") + logger.warning(f"Server {log_name} is slow to stop") # return False to prevent culling user with pending shutdowns return False return True @@ -347,7 +351,7 @@ async def handle_user(user): # some servers are still running, cannot cull users still_alive = len(results) - sum(results) if still_alive: - app_log.debug( + logger.debug( "Not culling user %s with %i servers still alive", user["name"], still_alive, @@ -378,21 +382,21 @@ async def handle_user(user): ) and (cull_admin_users or not user_is_admin) if should_cull: - app_log.info(f"Culling user {user['name']} " f"(inactive for {inactive})") + logger.info(f"Culling user {user['name']} " f"(inactive for {inactive})") if max_age and not should_cull: # only check created if max_age is specified # so that we can still be compatible with jupyterhub 0.8 # which doesn't define the 'started' field if age is not None and age.total_seconds() >= max_age: - app_log.info( + logger.info( f"Culling user {user['name']} " f"(age: {format_td(age)}, inactive for {format_td(inactive)})" ) should_cull = True if not should_cull: - app_log.debug( + logger.debug( f"Not culling user {user['name']} " f"(created: {format_td(age)}, last active: {format_td(inactive)})" ) @@ -422,7 +426,7 @@ async def handle_user(user): async for user in fetch_paginated(req): n_idle += 1 futures.append((user["name"], handle_user(user))) - app_log.debug(f"Got {n_idle} users with inactive servers") + logger.debug(f"Got {n_idle} users with inactive servers") if state_filter: params["state"] = "ready" @@ -438,200 +442,292 @@ async def handle_user(user): futures.append((user["name"], handle_user(user))) if state_filter: - app_log.debug(f"Got {n_users} users with ready servers") + logger.debug(f"Got {n_users} users with ready servers") else: - app_log.debug(f"Got {n_users} users") + logger.debug(f"Got {n_users} users") for name, f in futures: try: result = await f except Exception: - app_log.exception(f"Error processing {name}") + logger.exception(f"Error processing {name}") else: if result: - app_log.debug("Finished culling %s", name) + logger.debug("Finished culling %s", name) -def main(): - define( - "url", - default=os.environ.get("JUPYTERHUB_API_URL"), +class IdleCuller(Application): + + api_page_size = Int( + 0, help=dedent( """ - The JupyterHub API URL. + Number of users to request per page, + when using JupyterHub 2.0's paginated user list API. + Default: user the server-side default configured page size. """ ).strip(), + ).tag( + config=True, ) - define( - "timeout", - type=int, - default=600, + + concurrency = Int( + 10, help=dedent( """ - The idle timeout (in seconds). + Limit the number of concurrent requests made to the Hub. + + Deleting a lot of users at the same time can slow down the Hub, + so limit the number of API requests we have outstanding at any given time. """ ).strip(), + ).tag( + config=True, ) - define( - "cull_every", - type=int, - default=0, + + config_file = Unicode( + "idle_culler_config.py", help=dedent( """ - The interval (in seconds) for checking for idle servers to cull. + Config file to load. """ ).strip(), + ).tag( + config=True, ) - define( - "max_age", - type=int, - default=0, + + cull_admin_users = Bool( + True, help=dedent( """ - The maximum age (in seconds) of servers that should be culled even if they are active. + Whether admin users should be culled (only if --cull-users=true). """ ).strip(), + ).tag( + config=True, ) - define( - "cull_users", - type=bool, - default=False, + + cull_default_servers = Bool( + True, help=dedent( """ - Cull users in addition to servers. - - This is for use in temporary-user cases such as tmpnb. + Whether default servers should be culled (only if --cull-default-servers=true). """ ).strip(), + ).tag( + config=True, ) - define( - "remove_named_servers", - default=False, - type=bool, + + cull_every = Int( + 0, help=dedent( """ - Remove named servers in addition to stopping them. + The interval (in seconds) for checking for idle servers to cull. + """ + ).strip(), + ).tag( + config=True, + ) - This is useful for a BinderHub that uses authentication and named servers. + @default("cull_every") + def _default_cull_every(self): + return self.timeout // 2 + + cull_named_servers = Bool( + True, + help=dedent( + """ + Whether named servers should be culled (only if --cull-named-servers=true). """ ).strip(), + ).tag( + config=True, ) - define( - "concurrency", - type=int, - default=10, + + cull_users = Bool( + False, help=dedent( """ - Limit the number of concurrent requests made to the Hub. + Cull users in addition to servers. - Deleting a lot of users at the same time can slow down the Hub, - so limit the number of API requests we have outstanding at any given time. + This is for use in temporary-user cases such as tmpnb. """ ).strip(), + ).tag( + config=True, ) - define( - "ssl_enabled", - type=bool, - default=False, + + generate_config = Bool( + False, help=dedent( """ - Whether the Jupyter API endpoint has TLS enabled. + Generate default config file. """ ).strip(), + ).tag( + config=True, ) - define( - "internal_certs_location", - type=str, - default="internal-ssl", + + internal_certs_location = Unicode( + "internal-ssl", help=dedent( """ The location of generated internal-ssl certificates (only needed with --ssl-enabled=true). """ ).strip(), + ).tag( + config=True, ) - define( - "cull_admin_users", - type=bool, - default=True, + + _log_formatter_cls = LogFormatter + + @default("log_level") + def _log_level_default(self): + return logging.INFO + + @default("log_datefmt") + def _log_datefmt_default(self): + """Exclude date from default date format""" + return "%Y-%m-%d %H:%M:%S" + + @default("log_format") + def _log_format_default(self): + """override default log format to include time""" + return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s %(module)s:%(lineno)d]%(end_color)s %(message)s" + + max_age = Int( + 0, help=dedent( """ - Whether admin users should be culled (only if --cull-users=true). + The maximum age (in seconds) of servers that should be culled even if they are active.", """ ).strip(), + ).tag( + config=True, ) - define( - "api_page_size", - type=int, - default=0, + + remove_named_servers = Bool( + False, help=dedent( """ - Number of users to request per page, - when using JupyterHub 2.0's paginated user list API. - Default: user the server-side default configured page size. + Remove named servers in addition to stopping them. + + This is useful for a BinderHub that uses authentication and named servers. """ ).strip(), + ).tag( + config=True, ) - define( - "cull_default_servers", - type=bool, - default=True, + + ssl_enabled = Bool( + False, help=dedent( """ - Whether default servers should be culled (only if --cull-default-servers=true). + Whether the Jupyter API endpoint has TLS enabled. """ ).strip(), + ).tag( + config=True, ) - define( - "cull_named_servers", - type=bool, - default=True, + + timeout = Int( + 600, help=dedent( """ - Whether named servers should be culled (only if --cull-named-servers=true). + The idle timeout (in seconds). """ ).strip(), + ).tag( + config=True, ) - parse_command_line() - if not options.cull_every: - options.cull_every = options.timeout // 2 - api_token = os.environ["JUPYTERHUB_API_TOKEN"] - - try: - AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") - except ImportError as e: - app_log.warning( - f"Could not load pycurl: {e}\n" - "pycurl is recommended if you have a large number of users." + url = Unicode( + os.environ.get("JUPYTERHUB_API_URL"), + allow_none=True, + help=dedent( + """ + The JupyterHub API URL. + """ + ).strip(), + ).tag( + config=True, + ) + + aliases = { + "api-page-size": "IdleCuller.api_page_size", + "concurrency": "IdleCuller.concurrency", + "config": "IdleCuller.config_file", + "cull-admin-users": "IdleCuller.cull_admin_users", + "cull-default-servers": "IdleCuller.cull_default_servers", + "cull-every": "IdleCuller.cull_every", + "cull-named-servers": "IdleCuller.cull_named_servers", + "cull-users": "IdleCuller.cull_users", + "internal-certs-location": "IdleCuller.internal_certs_location", + "max-age": "IdleCuller.max_age", + "remove-named-servers": "IdleCuller.remove_named_servers", + "ssl-enabled": "IdleCuller.ssl_enabled", + "timeout": "IdleCuller.timeout", + "url": "IdleCuller.url", + } + + flags = { + "generate-config": ( + {"IdleCuller": {"generate_config": True}}, + generate_config.help, ) + } - loop = IOLoop.current() - cull = partial( - cull_idle, - url=options.url, - api_token=api_token, - inactive_limit=options.timeout, - cull_users=options.cull_users, - remove_named_servers=options.remove_named_servers, - max_age=options.max_age, - concurrency=options.concurrency, - ssl_enabled=options.ssl_enabled, - internal_certs_location=options.internal_certs_location, - cull_admin_users=options.cull_admin_users, - api_page_size=options.api_page_size, - cull_default_servers=options.cull_default_servers, - cull_named_servers=options.cull_named_servers, - ) - # schedule first cull immediately - # because PeriodicCallback doesn't start until the end of the first interval - loop.add_callback(cull) - # schedule periodic cull - pc = PeriodicCallback(cull, 1e3 * options.cull_every) - pc.start() - try: - loop.start() - except KeyboardInterrupt: - pass + def start(self): + + if self.generate_config: + print(self.generate_config_file()) + sys.exit(0) + + if self.config_file: + self.load_config_file(self.config_file) + + api_token = os.environ["JUPYTERHUB_API_TOKEN"] + + try: + AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") + except ImportError as e: + self.log.warning( + f"Could not load pycurl: {e}\n" + "pycurl is recommended if you have a large number of users." + ) + + loop = IOLoop.current() + cull = partial( + cull_idle, + url=self.url, + api_token=api_token, + inactive_limit=self.timeout, + logger=self.log, + cull_users=self.cull_users, + remove_named_servers=self.remove_named_servers, + max_age=self.max_age, + concurrency=self.concurrency, + ssl_enabled=self.ssl_enabled, + internal_certs_location=self.internal_certs_location, + cull_admin_users=self.cull_admin_users, + api_page_size=self.api_page_size, + cull_default_servers=self.cull_default_servers, + cull_named_servers=self.cull_named_servers, + ) + # schedule first cull immediately + # because PeriodicCallback doesn't start until the end of the first interval + loop.add_callback(cull) + # schedule periodic cull + pc = PeriodicCallback(cull, 1e3 * self.cull_every) + pc.start() + try: + loop.start() + except KeyboardInterrupt: + pass + + +def main(): + IdleCuller.launch_instance() if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 6bc9d33..4cb78f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "tornado", "packaging", "python-dateutil", + "traitlets", ] dynamic = ["version"] diff --git a/tests/test_idle_culler.py b/tests/test_idle_culler.py index 88e9901..e6986df 100644 --- a/tests/test_idle_culler.py +++ b/tests/test_idle_culler.py @@ -3,6 +3,8 @@ from subprocess import check_output from unittest import mock +from tornado.log import app_log + from jupyterhub_idle_culler import utcnow @@ -32,7 +34,7 @@ async def test_cull_idle(cull_idle, start_users, admin_request): assert await count_active_users(admin_request) == 0 await start_users(3) assert await count_active_users(admin_request) == 3 - await cull_idle(inactive_limit=300) + await cull_idle(inactive_limit=300, logger=app_log) # no change assert await count_active_users(admin_request) == 3 @@ -40,7 +42,7 @@ async def test_cull_idle(cull_idle, start_users, admin_request): with mock.patch( "jupyterhub_idle_culler.utcnow", lambda: utcnow() + timedelta(seconds=600) ): - await cull_idle(inactive_limit=300) + await cull_idle(inactive_limit=300, logger=app_log) assert await count_active_users(admin_request) == 0