|
4 | 4 |
|
5 | 5 | from pds4_tools import pds4_read
|
6 | 6 | from pds4_tools.reader.table_objects import TableManifest
|
7 |
| -import pds4_tools |
| 7 | +# import pds4_tools |
8 | 8 | from pathlib import Path
|
9 | 9 | from lxml import etree
|
10 |
| -import numpy as np |
11 | 10 | import pandas as pd
|
12 | 11 | import sys
|
13 | 12 | import yaml
|
14 | 13 | import os
|
| 14 | +import logging |
15 | 15 |
|
16 | 16 | try:
|
17 | 17 | import cPickle as pickle
|
18 | 18 | except:
|
19 | 19 | import pickle
|
20 |
| - |
21 | 20 |
|
22 |
| -import logging |
23 |
| -log = logging.getLogger(__name__) |
| 21 | +# set up module level logging |
24 | 22 |
|
25 | 23 | # only show warning or higher messages from PDS4 tools
|
26 | 24 | pds4_logger = logging.getLogger('PDS4ToolsLogger')
|
27 | 25 | pds4_logger.setLevel(logging.WARNING)
|
28 | 26 |
|
| 27 | +class DuplicateFilter(logging.Filter): |
| 28 | + |
| 29 | + def filter(self, record): |
| 30 | + current_log = (record.module, record.levelno, record.msg) |
| 31 | + last = getattr(self, "last_log", None) |
| 32 | + if (last is None) or (last != current_log): |
| 33 | + self.last_log = current_log |
| 34 | + return True # i.e. log the message |
| 35 | + else: |
| 36 | + return False # i.e. do not log the message |
| 37 | + |
| 38 | +log = logging.getLogger(__name__) |
| 39 | +handler = logging.StreamHandler(sys.stdout) |
| 40 | +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
| 41 | +handler.setFormatter(formatter) |
| 42 | +log.addHandler(handler) |
| 43 | +log.setLevel(logging.WARNING) |
| 44 | + |
| 45 | + |
29 | 46 | default_config = os.path.join(
|
30 | 47 | os.environ.get('APPDATA') or
|
31 | 48 | os.environ.get('XDG_CONFIG_HOME') or
|
32 | 49 | os.path.join(os.environ['HOME'], '.config'),
|
33 | 50 | "pds_dbase.yml")
|
34 | 51 |
|
| 52 | + |
35 | 53 | def index_products(directory='.', pattern='*.xml'):
|
36 | 54 | """
|
37 | 55 | Accepts a directory containing PDS4 products, indexes the labels and returns a
|
@@ -222,7 +240,7 @@ def _constructor(self):
|
222 | 240 | return pds4_df
|
223 | 241 |
|
224 | 242 |
|
225 |
| -def read_tables(label, label_directory='.', recursive=False, table_name=None, index_col=None, add_filename=False): |
| 243 | +def read_tables(label, label_directory='.', recursive=False, table_name=None, index_col=None, add_filename=False, quiet=False): |
226 | 244 | """
|
227 | 245 | Accepts a directory and file-pattern or list and attempts to load the specified table
|
228 | 246 | (or first table, if none is specified) into a merged DataFrane. If the tables
|
@@ -254,17 +272,22 @@ def read_tables(label, label_directory='.', recursive=False, table_name=None, in
|
254 | 272 | # de-dupe list
|
255 | 273 | file_list = list(set(file_list))
|
256 | 274 |
|
| 275 | + handler.addFilter(DuplicateFilter()) |
| 276 | + filter_inst = log.handlers[0].filters[-1] |
| 277 | + |
257 | 278 | for f in file_list:
|
258 | 279 | if table is None:
|
259 |
| - table = read_table(f, table_name=table_name, index_col=index_col) |
| 280 | + table = read_table(f, table_name=table_name, index_col=index_col, quiet=quiet) |
260 | 281 | if add_filename:
|
261 | 282 | table['filename'] = table.filename
|
262 | 283 | else:
|
263 |
| - temp_table = read_table(f, table_name=table_name, index_col=index_col) |
| 284 | + temp_table = read_table(f, table_name=table_name, index_col=index_col, quiet=quiet) |
264 | 285 | if add_filename:
|
265 | 286 | temp_table['filename'] = temp_table.filename
|
266 | 287 | table = table.append(temp_table)
|
267 | 288 |
|
| 289 | + handler.removeFilter(filter_inst) |
| 290 | + |
268 | 291 | table.sort_index(inplace=True)
|
269 | 292 |
|
270 | 293 | log.info('{:d} files read with {:d} total records'.format(len(file_list), len(table)))
|
|
0 commit comments