Skip to content

Commit 5584e81

Browse files
author
steinbach
committed
prevent providing data when retriever is stuck
1 parent 3b5c598 commit 5584e81

File tree

4 files changed

+121
-140
lines changed

4 files changed

+121
-140
lines changed

CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
All notable changes to this project will be documented in this file.
33

44
## [Unreleased] (Available through Edge Tag)
5+
- No unreleased changes
6+
7+
## [0.4.3] - 2021-06-29
8+
### Fixed
9+
- Prevent sending outdated data when retrieval process is stuck
10+
511
## [0.4.2] - 2021-05-07
612
### Changed
713
- Fixed issues colected in static analysis
@@ -32,7 +38,8 @@ All notable changes to this project will be documented in this file.
3238
## [0.1.0] - 2021-04-22
3339
Initial release to enable "latest" tag on dockerhub
3440

35-
[unreleased]: https://github.com/tillsteinbach/prosafe_exporter_python/compare/v0.4.2...HEAD
41+
[unreleased]: https://github.com/tillsteinbach/prosafe_exporter_python/compare/v0.4.3...HEAD
42+
[0.4.3]: https://github.com/tillsteinbach/prosafe_exporter_python/releases/tag/v0.4.3
3643
[0.4.2]: https://github.com/tillsteinbach/prosafe_exporter_python/releases/tag/v0.4.2
3744
[0.4.1]: https://github.com/tillsteinbach/prosafe_exporter_python/releases/tag/v0.4.1
3845
[0.4.0]: https://github.com/tillsteinbach/prosafe_exporter_python/releases/tag/v0.4.0

prosafe_exporter/prosafe_exporter/prosafe_exporter.py

Lines changed: 66 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import json
77
import os
88
import threading
9+
from datetime import datetime, timedelta
910
import logging
1011
import logging.config
1112
from multiprocessing import Lock
@@ -18,21 +19,27 @@
1819

1920
from ._version import __version__
2021

22+
LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
23+
DEFAULT_LOG_LEVEL = "ERROR"
24+
25+
LOG = logging.getLogger("ProSafeExporter")
26+
2127
mutex = Lock()
2228
speedmap = {'Nicht verbunden': '0', 'No Speed': '0', '10M': '10', '100M': '100', '1000M': '1000'}
2329

2430

2531
class ProSafeExporter:
26-
def __init__(self, retrievers=None, logger=logging.getLogger()):
27-
self.logger = logger
32+
def __init__(self, retrievers=None, retrieveInterval=20.0):
2833
self.retrievers = retrievers
34+
self.retrieveInterval = retrieveInterval
35+
self.lastRetrieve = None
2936

3037
self.app = flask.Flask('ProSafeExporter')
3138
self.app.add_url_rule('/<path>', '/<path:path>',
3239
self.__probe, methods=['POST', 'GET'])
3340
self.app.add_url_rule('/', '/', self.__probe, methods=['POST', 'GET'])
3441

35-
def run(self, host="0.0.0.0", port=9493, retrieveInterval=20.0, debug=False, endless=True): # nosec
42+
def run(self, host="0.0.0.0", port=9493, debug=False, endless=True): # nosec
3643
if not debug: # pragma: no cover
3744
os.environ['WERKZEUG_RUN_MAIN'] = 'true'
3845
log = logging.getLogger('werkzeug')
@@ -42,37 +49,41 @@ def run(self, host="0.0.0.0", port=9493, retrieveInterval=20.0, debug=False, end
4249

4350
webthread = threading.Thread(target=server.serve_forever)
4451
webthread.start()
45-
self.logger.info('ProSafeExporter is listening on %s:%s for request on /metrics endpoint'
46-
' (but you can also use any other path)', host, port)
52+
LOG.info('ProSafeExporter is listening on %s:%s for request on /metrics endpoint'
53+
' (but you can also use any other path)', host, port)
4754

4855
try:
4956
self.__retrieve()
5057
while endless: # pragma: no cover
51-
time.sleep(retrieveInterval)
58+
time.sleep(self.retrieveInterval)
5259
self.__retrieve()
5360
except KeyboardInterrupt: # pragma: no cover
5461
pass
5562
server.shutdown()
5663
webthread.join()
57-
self.logger.info('ProSafeExporter was stopped')
64+
LOG.info('ProSafeExporter was stopped')
5865

5966
def __probe(self, path=None):
60-
result = "# Exporter output\n\n"
61-
for retriever in self.retrievers:
62-
result += retriever.result + '\n\n'
63-
self.logger.info('Request on endpoint /%s \n%s', path, result)
64-
return flask.Response(result, status=200, headers={})
67+
if self.lastRetrieve is not None \
68+
and self.lastRetrieve > datetime.now() - timedelta(seconds=(self.retrieveInterval * 5)):
69+
result = "# Exporter output\n\n"
70+
for retriever in self.retrievers:
71+
result += retriever.result + '\n\n'
72+
LOG.debug('Request on endpoint /%s \n%s', path, result)
73+
return flask.Response(result, status=200, headers={})
74+
return flask.Response('', status=503, headers={'Retry-After': self.retrieveInterval})
6575

6676
def __retrieve(self):
67-
self.logger.info('Retrieving data from all devies')
77+
LOG.info('Retrieving data from all devies')
6878
for retriever in self.retrievers:
6979
try:
7080
retriever.retrieve()
7181
except (ConnectionRefusedError, requests.exceptions.ConnectionError):
72-
self.logger.error(
82+
LOG.error(
7383
'Failed to refrieve for host %s', retriever.hostname)
84+
self.lastRetrieve = datetime.now()
7485
retriever.writeResult()
75-
self.logger.info('Retrieving done')
86+
LOG.info('Retrieving done')
7687

7788

7889
class ProSafeRetrieve:
@@ -83,10 +94,8 @@ def __init__(self,
8394
hostname,
8495
password,
8596
cookiefile=None,
86-
logger=logging.getLogger(),
8797
retries=10,
8898
requestTimeout=10.0):
89-
self.logger = logger
9099
self.retries = retries
91100
self.requestTimeout = requestTimeout
92101
self.hostname = hostname
@@ -108,41 +117,41 @@ def __init__(self,
108117
self.__session.cookies.update(cookies)
109118
self.loggedIn = True
110119
except json.JSONDecodeError as err:
111-
self.logger.info('Created retriever for host %s'
112-
' but could not use cookiefile %s (%s)', self.hostname, cookiefile, err.msg)
120+
LOG.info('Created retriever for host %s'
121+
' but could not use cookiefile %s (%s)', self.hostname, cookiefile, err.msg)
113122
except FileNotFoundError as err:
114-
self.logger.info('Created retriever for host %s'
115-
' but could not use cookiefile %s (%s)', self.hostname, cookiefile, err)
123+
LOG.info('Created retriever for host %s'
124+
' but could not use cookiefile %s (%s)', self.hostname, cookiefile, err)
116125
self.cookieFile = cookiefile
117-
self.logger.info('Created retriever for host %s using cookiefile %s', self.hostname, cookiefile)
126+
LOG.info('Created retriever for host %s using cookiefile %s', self.hostname, cookiefile)
118127
except OSError: # pragma: no cover
119-
self.logger.info('Created retriever for host %s'
120-
' but could not use cookiefile %s', self.hostname, cookiefile)
128+
LOG.info('Created retriever for host %s'
129+
' but could not use cookiefile %s', self.hostname, cookiefile)
121130
else:
122-
self.logger.info('Created retriever for host %s', self.hostname)
131+
LOG.info('Created retriever for host %s', self.hostname)
123132

124133
def __del__(self):
125134
if self.cookieFile:
126135
try:
127136
with open(self.cookieFile, 'w') as file:
128137
json.dump(requests.utils.dict_from_cookiejar(self.__session.cookies), file)
129-
self.logger.info('Writing cookiefile %s', self.cookieFile)
138+
LOG.info('Writing cookiefile %s', self.cookieFile)
130139
self.__cookiefd = None
131140
except ValueError as err: # pragma: no cover
132-
self.logger.info('Could not write cookiefile %s for host %s (%s)',
133-
self.__cookiefd.name, self.hostname, err)
141+
LOG.info('Could not write cookiefile %s for host %s (%s)',
142+
self.__cookiefd.name, self.hostname, err)
134143

135144
def __login(self):
136145
if self.loggedIn:
137146
indexPageRequest = self.__session.get(
138147
f'http://{self.hostname}/index.htm', timeout=self.requestTimeout)
139148
if 'RedirectToLoginPage' not in indexPageRequest.text:
140-
self.logger.info('Already logged in for %s', self.hostname)
149+
LOG.info('Already logged in for %s', self.hostname)
141150
return
142151
# lets start with a new session
143152
self.__session = requests.Session()
144153
self.loggedIn = False
145-
self.logger.info('Have to login again for %s due to inactive session', self.hostname)
154+
LOG.info('Have to login again for %s due to inactive session', self.hostname)
146155
loginPageRequest = self.__session.get(
147156
f'http://{self.hostname}/login.htm', timeout=self.requestTimeout)
148157
loginPageRequest.raise_for_status()
@@ -152,8 +161,8 @@ def __login(self):
152161
payload = None
153162
if len(rand) != 1:
154163
# looks like an old firmware without seed
155-
self.logger.warning('Your switch %s uses an old firmware which sends your password'
156-
' unencrypted while retrieving data. Please conscider updating', self.hostname)
164+
LOG.warning('Your switch %s uses an old firmware which sends your password'
165+
' unencrypted while retrieving data. Please conscider updating', self.hostname)
157166

158167
payload = {
159168
'password': self.password,
@@ -176,7 +185,7 @@ def __login(self):
176185
errorMsg = tree.xpath('//input[@id="err_msg"]/@value[1]')
177186
if errorMsg and errorMsg[0]:
178187
self.error = f'I could not login at the switch {self.hostname} due to: {errorMsg[0]}'
179-
self.logger.error(self.error)
188+
LOG.error(self.error)
180189
raise ConnectionRefusedError(self.error)
181190
self.loggedIn = True
182191

@@ -186,7 +195,7 @@ def __retrieveInfos(self): # noqa: C901
186195

187196
if 'RedirectToLoginPage' in infoRequest.text:
188197
self.error = 'Login failed for ' + self.hostname
189-
self.logger.error(self.error)
198+
LOG.error(self.error)
190199
raise ConnectionRefusedError(self.error)
191200
tree = html.fromstring(infoRequest.content)
192201
allinfos = tree.xpath('//table[@class="tableStyle"]//td[@nowrap=""]')
@@ -230,7 +239,7 @@ def __retrieveStatus(self):
230239

231240
if 'RedirectToLoginPage' in statusRequest.text:
232241
self.error = 'Login failed for ' + self.hostname
233-
self.logger.error(self.error)
242+
LOG.error(self.error)
234243
self.__infos = None
235244
raise ConnectionRefusedError(self.error)
236245

@@ -275,15 +284,15 @@ def __retrieveStatus(self):
275284
self.__status = [[speedmap[n] if i == 2 else n for i,
276285
n in enumerate(portStatus)] for portStatus in self.__status]
277286
break
278-
self.logger.info('Problem while retrieving status for %s'
279-
' this can happen when there is much traffic on the device', self.hostname)
287+
LOG.info('Problem while retrieving status for %s'
288+
' this can happen when there is much traffic on the device', self.hostname)
280289
retries -= 1
281290
if retries == 0:
282291
self.__status = None
283292
self.error = f'Could not retrieve correct status for {self.hostname} after {self.retries}' \
284293
' retries. This can happen when there is much traffic on the device, but it is more likely' \
285294
' that the firmware is not understood'
286-
self.logger.error(self.error)
295+
LOG.error(self.error)
287296
return False
288297
return True
289298

@@ -302,7 +311,7 @@ def __retrieveStatistics(self):
302311

303312
if 'RedirectToLoginPage' in statisticsRequest.text:
304313
self.error = f'Login failed for {self.hostname}'
305-
self.logger.error(self.error)
314+
LOG.error(self.error)
306315
self.__infos = None
307316
self.__status = None
308317
raise ConnectionRefusedError(self.error)
@@ -325,19 +334,19 @@ def __retrieveStatistics(self):
325334
noProblem = False
326335
if noProblem:
327336
break
328-
self.logger.info('Problem while retrieving statistics for %s'
329-
' this can happen when there is much traffic on the device', self.hostname)
337+
LOG.info('Problem while retrieving statistics for %s'
338+
' this can happen when there is much traffic on the device', self.hostname)
330339
retries -= 1
331340
if retries == 0:
332341
self.__statistics = None
333342
self.error = f'Could not retrieve correct statistics for {self.hostname} after {self.retries} retries.' \
334343
' This can happen when there is much traffic on the device'
335-
self.logger.error(self.error)
344+
LOG.error(self.error)
336345
return False
337346
return True
338347

339348
def retrieve(self):
340-
self.logger.info('Start retrieval for %s', self.hostname)
349+
LOG.info('Start retrieval for %s', self.hostname)
341350

342351
with mutex:
343352
self.error = ""
@@ -362,17 +371,17 @@ def retrieve(self):
362371
self.error = f'Result is not plausible for {self.hostname}' \
363372
' Different number of ports for statistics and status. This can happen when there is much' \
364373
' traffic on the device'
365-
self.logger.error(self.error)
374+
LOG.error(self.error)
366375
return
367376

368-
self.logger.info('Retrieval for %s done', self.hostname)
377+
LOG.info('Retrieval for %s done', self.hostname)
369378

370379
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError):
371380
self.__infos = None
372381
self.__status = None
373382
self.__statistics = None
374383
self.error = f'Connection Error with host {self.hostname}'
375-
self.logger.error(self.error)
384+
LOG.error(self.error)
376385

377386
def writeResult(self): # noqa: C901
378387
result = ""
@@ -444,35 +453,20 @@ def main(endless=True, always_early_timeout=False): # noqa: C901
444453
description='Query Netgear ProSafe Switches using the web interface to provide statistics for Prometheus')
445454
parser.add_argument('config', type=argparse.FileType(
446455
'r'), help='configuration')
447-
parser.add_argument('-v', '--verbose',
448-
help='increase output verbosity', action='store_true')
456+
parser.add_argument('-v', '--verbose', action="append_const", const=-1,)
449457
parser.add_argument('--version', action='version',
450458
version='%(prog)s {version}'.format(version=__version__))
451459
args = parser.parse_args()
452460

453-
logger = logging.getLogger('ProSafe_Exporter')
454-
logger.setLevel(logging.INFO)
455-
456-
ch = logging.StreamHandler()
457-
458-
if args.verbose:
459-
ch.setLevel(logging.INFO)
460-
else:
461-
ch.setLevel(logging.WARNING)
462-
463-
# create formatter
464-
formatter = logging.Formatter(
465-
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
466-
467-
# add formatter to ch
468-
ch.setFormatter(formatter)
461+
logLevel = LOG_LEVELS.index(DEFAULT_LOG_LEVEL)
462+
for adjustment in args.verbose or ():
463+
logLevel = min(len(LOG_LEVELS) - 1, max(logLevel + adjustment, 0))
469464

470-
# add ch to logger
471-
logger.addHandler(ch)
465+
logging.basicConfig(level=LOG_LEVELS[logLevel])
472466

473467
config = yaml.load(args.config, Loader=yaml.SafeLoader)
474468
if not config:
475-
logger.error('Config empty or cannot be parsed')
469+
LOG.error('Config empty or cannot be parsed')
476470
sys.exit(3)
477471

478472
if 'global' not in config:
@@ -489,7 +483,7 @@ def main(endless=True, always_early_timeout=False): # noqa: C901
489483
config['global']['retries'] = 10
490484

491485
if 'switches' not in config or not config['switches']:
492-
logger.error(
486+
LOG.error(
493487
'You have to define switches in the switches: section of your configuration')
494488
sys.exit(4)
495489

@@ -499,11 +493,11 @@ def main(endless=True, always_early_timeout=False): # noqa: C901
499493
retrievers = list()
500494
for switch in config['switches']:
501495
if 'hostname' not in switch:
502-
logger.error(
496+
LOG.error(
503497
'You have to define the hostname for the switch, ignoring this switch entry')
504498
continue
505499
if 'password' not in switch:
506-
logger.error(
500+
LOG.error(
507501
'You have to define the password for the switch, ignoring this switch entry')
508502
continue
509503
if 'cookiefile' not in switch:
@@ -512,13 +506,11 @@ def main(endless=True, always_early_timeout=False): # noqa: C901
512506
ProSafeRetrieve(
513507
hostname=switch['hostname'],
514508
password=switch['password'],
515-
logger=logger,
516509
retries=config['global']['retries'],
517510
requestTimeout=config['global']['retrieve_timeout'],
518511
cookiefile=switch['cookiefile']))
519-
exporter = ProSafeExporter(retrievers=retrievers, logger=logger)
520-
exporter.run(host=config['global']['host'], port=config['global']['port'],
521-
retrieveInterval=config['global']['retrieve_interval'], debug=args.verbose, endless=endless)
512+
exporter = ProSafeExporter(retrievers=retrievers, retrieveInterval=config['global']['retrieve_interval'])
513+
exporter.run(host=config['global']['host'], port=config['global']['port'], debug=args.verbose, endless=endless)
522514
# Cleanup
523515
del exporter
524516
retrievers.clear()

0 commit comments

Comments
 (0)