diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ac609638..60b00e349 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,6 @@ project(sbncode VERSION 10.04.06.01 LANGUAGES CXX) message(STATUS "\n\n ========================== ${PROJECT_NAME} ==========================") - include(CetCMakeEnv) cet_cmake_env() @@ -82,6 +81,7 @@ find_package( Boost COMPONENTS system filesystem REQUIRED ) include(ArtDictionary) include(ArtMake) include(BuildPlugins) +include(SBNutils) add_subdirectory(sbncode) add_subdirectory(fcl) diff --git a/fcl/CMakeLists.txt b/fcl/CMakeLists.txt index 3b6d71246..9a1d7be80 100644 --- a/fcl/CMakeLists.txt +++ b/fcl/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(gen) add_subdirectory(g4) add_subdirectory(caf) +add_subdirectory(util) install_fhicl() diff --git a/fcl/util/CMakeLists.txt b/fcl/util/CMakeLists.txt new file mode 100644 index 000000000..13355789a --- /dev/null +++ b/fcl/util/CMakeLists.txt @@ -0,0 +1 @@ +install_fhicl() diff --git a/fcl/util/dump_sbnjobmetadata.fcl b/fcl/util/dump_sbnjobmetadata.fcl new file mode 100644 index 000000000..2b14d3c68 --- /dev/null +++ b/fcl/util/dump_sbnjobmetadata.fcl @@ -0,0 +1,41 @@ +# +# File: dump_sbnjobmetadata.fcl +# Purpose: Prints on screen all the job SBN metadata stored in an input file. +# Author: Gianluca Petrillo (petrillo@slac.stanford.edu) +# Date: January 20, 2025 +# +# Reads and prints on screen the SBN job environment information from the input +# file. +# +# Changes +# -------- +# +# [20250120] (petrillo@slac.stanford.edu) +# original version +# + +#include "messages_icarus.fcl" + +process_name: JobMeta + +# use `icarus_message_services_interactive` to see dump on screen +services.message: @local::icarus_message_services_prod + +services.message.destinations.MetadataLog: { + type: file + filename: "JobEnvironment.log" + append: false + threshold: INFO + categories: { + DumpJobEnvironment: { limit: -1 } + default: { limit: 0 } + } +} + +outputs.metadataDumper: { + module_type: "DumpJobEnvironment" +} + +physics: { + streams: [ metadataDumper ] +} diff --git a/sbncode/Metadata/CMakeLists.txt b/sbncode/Metadata/CMakeLists.txt index 6cf9127e5..67cc0d27a 100644 --- a/sbncode/Metadata/CMakeLists.txt +++ b/sbncode/Metadata/CMakeLists.txt @@ -1,5 +1,21 @@ cet_enable_asserts() +GenerateRepoVersionSource(${CMAKE_PROJECT_NAME}) + +message(DEBUG "Generated source file '${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.h'") + +art_make_library( + LIBRARIES + sbnobj::Common_Metadata + larcorealg::headers + art::Persistency_Provenance + art::Utilities + canvas::canvas + messagefacility::MF_MessageLogger + fhiclcpp::fhiclcpp + ) + + set( sbn_meta_lib_list art::Framework_Core art::Framework_Principal art::Framework_Services_Registry @@ -21,6 +37,35 @@ cet_build_plugin( MetadataSBN art::service art::Framework_IO_detail ) +cet_build_plugin(SaveJobEnvironment art::ResultsProducer + LIBRARIES + sbncode::Metadata + sbnobj::Common_Metadata + art_root_io::TFileService_service + art_root_io::tfile_support + art::Framework_Services_Registry + ) + +# cet_build_plugin(DumpJobEnvironment art::ResultsProducer +# LIBRARIES +# sbnobj::Common_Metadata +# ) + +cet_build_plugin(DumpJobEnvironment art::Output + LIBRARIES + sbnobj::Common_Metadata + ) + +cet_build_plugin(sbncodeRepositoryVersion art::tool + LIBRARIES + sbncode::Metadata + sbnobj::Metadata + sbnobj::Common_Metadata + ) + + +install_headers(LIST "${CMAKE_CURRENT_BINARY_DIR}/${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.h") +install_source(LIST "${CMAKE_CURRENT_BINARY_DIR}/${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.cxx") install_headers() install_fhicl() install_source() diff --git a/sbncode/Metadata/DumpJobEnvironment_module.cc b/sbncode/Metadata/DumpJobEnvironment_module.cc new file mode 100644 index 000000000..93e4b8eb8 --- /dev/null +++ b/sbncode/Metadata/DumpJobEnvironment_module.cc @@ -0,0 +1,176 @@ +/** + * @file sbncode/Metadata/DumpJobEnvironment_module.cc + * @brief Producer module writing job environment information into output. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 16, 2025 + */ + +// local libraries +#include "sbnobj/Common/Metadata/JobEnvironmentInfo.h" + +// framework libraries +#include "art/Framework/Core/ModuleMacros.h" +#include "art/Framework/Core/OutputModule.h" +#include "art/Framework/Principal/ResultsPrincipal.h" +#include "art/Framework/Principal/Results.h" +#include "art/Framework/Principal/Provenance.h" +#include "art/Framework/Principal/Handle.h" +#include "art/Persistency/Provenance/ModuleContext.h" +#include "messagefacility/MessageLogger/MessageLogger.h" +#include "fhiclcpp/types/ConfigurationTable.h" // fhicl::WrappedTable +#include "fhiclcpp/types/TableFragment.h" +#include "fhiclcpp/types/Atom.h" + +// C++ standard libraries +#include +#include + + +// ----------------------------------------------------------------------------- +namespace sbn { class DumpJobEnvironment; } +/** + * @brief Output module dumping input versions to screen. + * + * The output module can be added to any of the end paths of an _art_ job to + * get a complete dump of the SBN job environment metadata stored into the input + * file. + * + * For example: + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * process_name: JobMeta + * + * services.message.destinations.MetadataLog: { + * type: file + * filename: "JobEnvironment.log" + * append: false + * threshold: INFO + * categories: { + * DumpJobEnvironment: { limit: -1 } + * default: { limit: 0 } + * } + * } + * + * outputs.metadataDumper: { module_type: "DumpJobEnvironment" } + * + * physics.streams: [ metadataDumper ] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * will dump the metadata into a text file named `JobEnvironment.log`. + * + */ +class sbn::DumpJobEnvironment: public art::OutputModule { + public: + + /// Module configuration. + struct Config { + + using Name = fhicl::Name; + using Comment = fhicl::Comment; + + fhicl::TableFragment OutputModuleConfig; + + fhicl::Atom LogCategory { + Name{ "LogCategory" }, + Comment{ "name of the messagefacility output category to be used" }, + "DumpJobEnvironment" + }; + + }; // Config + + using Parameters + = fhicl::WrappedTable; + + explicit DumpJobEnvironment(Parameters const& params); + + private: + + // --- BEGIN --- Configuration ----------------------------------------------- + + std::string const fLogCategory; ///< Messagefacility category for the output. + + // --- END ----- Configuration ----------------------------------------------- + + + /// Dumps the information from an handle into the output stream. + template + void dumpInformation( + Stream& out, + art::Handle> const& infoHandle + ) const; + + + + void write(art::EventPrincipal&) override {} + void writeRun(art::RunPrincipal&) override {} + void writeSubRun(art::SubRunPrincipal&) override {} + + /// Reads and prints all the metadata data products. + void readResults(art::ResultsPrincipal const& results) override; + +}; // sbn::DumpJobEnvironment + + +// ----------------------------------------------------------------------------- +// --- Implementation +// ----------------------------------------------------------------------------- +sbn::DumpJobEnvironment::DumpJobEnvironment(Parameters const& params) + : OutputModule{ params().OutputModuleConfig } + , fLogCategory{ params().LogCategory() } +{} + + +// ----------------------------------------------------------------------------- +void sbn::DumpJobEnvironment::readResults + (art::ResultsPrincipal const& principal) +{ + if (!principal.size()) return; + + art::ModuleContext const moduleContext{ moduleDescription() }; + art::Results const& results = principal.makeResults(moduleContext); + + std::vector>> infoHandles + = results.getMany>(); + + mf::LogInfo out{ fLogCategory }; + out << "Found " << infoHandles.size() << " job information entries in input."; + + for (art::Handle> const& infoHandle + : infoHandles + ) { + out << '\n' << std::string(80, '*') << '\n'; + dumpInformation(out, infoHandle); + } + +} // sbn::DumpJobEnvironment::readResults() + + +// ----------------------------------------------------------------------------- +template +void sbn::DumpJobEnvironment::dumpInformation( + Stream& out, + art::Handle> const& infoHandle +) const { + + art::Provenance const* provenance = infoHandle.provenance(); + if (provenance) { + out << "Information from '" << provenance->inputTag().encode() << "'"; + } + else { + out << "Information with unknown provenance"; + } + + if (infoHandle.isValid()) { + out << " from " << infoHandle->size() << " sources\n"; + for (sbn::JobEnvironmentInfo const& info: *infoHandle) { + out << std::string(80, '=') << '\n' << info; + } + } + else out << "\n[information not available]\n"; + +} // sbn::DumpJobEnvironment::dumpInformation() + + +// ----------------------------------------------------------------------------- +DEFINE_ART_MODULE(sbn::DumpJobEnvironment) + + +// ----------------------------------------------------------------------------- diff --git a/sbncode/Metadata/GITrepoVersion.cxx.in b/sbncode/Metadata/GITrepoVersion.cxx.in new file mode 100644 index 000000000..0d2a35157 --- /dev/null +++ b/sbncode/Metadata/GITrepoVersion.cxx.in @@ -0,0 +1,16 @@ +/** + * @file ${gitRepoVersionSourceStem}.cxx + * @brief GIT version for repository `${gitRepoName}`. + * @author automatically generated; + * template: Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 18, 2025 + * @see ${gitRepoVersionSourceStem}.h + */ + +#include "${gitRepoVersionSourceStem}.h" + +// ----------------------------------------------------------------------------- +const char RepositoryVersion_${gitRepoName}[] = "${gitRepoVersion}"; + + +// ----------------------------------------------------------------------------- diff --git a/sbncode/Metadata/GITrepoVersion.h.in b/sbncode/Metadata/GITrepoVersion.h.in new file mode 100644 index 000000000..a3210703d --- /dev/null +++ b/sbncode/Metadata/GITrepoVersion.h.in @@ -0,0 +1,18 @@ +/** + * @file ${gitRepoVersionSourceStem}.h + * @brief GIT version for repository `${gitRepoName}`. + * @author automatically generated; + * template: Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 18, 2025 + * @see ${gitRepoVersionSourceStem}.cxx + */ + +#ifndef ${gitRepoName}_${gitRepoVersionSourceStem}_H +#define ${gitRepoName}_${gitRepoVersionSourceStem}_H + + +/// Repository version for ${gitRepoName}. +extern const char RepositoryVersion_${gitRepoName}[]; + + +#endif // ${gitRepoName}_${gitRepoVersionSourceStem}_H diff --git a/sbncode/Metadata/JobEnvironmentInfoExtractor.cxx b/sbncode/Metadata/JobEnvironmentInfoExtractor.cxx new file mode 100644 index 000000000..4a7ec971f --- /dev/null +++ b/sbncode/Metadata/JobEnvironmentInfoExtractor.cxx @@ -0,0 +1,149 @@ +/** + * @file sbncode/Metadata/JobEnvironmentInfoExtractor.cxx + * @brief Algorithm extracting information from the job execution environment. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 16, 2025 + * @see sbncode/Metadata/JobEnvironmentInfoExtractor.h + */ + +// library header +#include "sbncode/Metadata/JobEnvironmentInfoExtractor.h" + +// SBN libraries +#include "sbncode/Metadata/RepositoryVersionReportUtils.h" +#include "sbncode/Metadata/RepositoryVersionReporter.h" + +// LArSoft libraries +#include "larcorealg/CoreUtils/enumerate.h" + +// framework libraries +#include "art/Persistency/Provenance/ModuleDescription.h" +#include "art/Utilities/make_tool.h" +#include "canvas/Utilities/Exception.h" +#include "messagefacility/MessageLogger/MessageLogger.h" +#include "fhiclcpp/ParameterSet.h" + +// C/C++ standard library +#include +#include +#include // std::move() + +// system libraries +#include // strchr() +#include // environ + + +// ----------------------------------------------------------------------------- +std::vector const +sbn::JobEnvironmentInfoExtractor::Config::DefaultDropVars = { + R"|(.*%%$)|" // BASH function pattern + , R"|(.*\(\)$)|" // another BASH function pattern +}; + + + +// ----------------------------------------------------------------------------- +sbn::JobEnvironmentInfoExtractor::JobEnvironmentInfoExtractor + (Config const& config /* = Config{} */) + : fDropVars{ prepareRegEx(config.dropVars) } + , fRepositoryReports{ config.repositories } + , fLogCategory{ config.logCategory } +{ + +} + + +// ----------------------------------------------------------------------------- +sbn::JobEnvironmentInfo sbn::JobEnvironmentInfoExtractor::extract + (art::ModuleDescription const& moduleInfo) +{ + + sbn::JobEnvironmentInfo info; + + info.processName = moduleInfo.processName(); + info.artVersion = moduleInfo.releaseVersion(); + + info.sources = extractSourceVersions(); + + info.variables = extractEnvironmentVariables(); + + return info; + +} // sbn::JobEnvironmentInfoExtractor::extract() + + +// ----------------------------------------------------------------------------- +auto sbn::JobEnvironmentInfoExtractor::extractSourceVersions() const + -> sbn::OrderedPairList +{ + + // take advantage of the basic + sbn::OrderedPairList sourceVersions; + addVersionFromRepositories(sourceVersions, fRepositoryReports); + sourceVersions.finish(); + + return sourceVersions; + +} // sbn::JobEnvironmentInfoExtractor::extractSourceVersions() + + +// ----------------------------------------------------------------------------- +auto sbn::JobEnvironmentInfoExtractor::extractEnvironmentVariables() const + -> sbn::OrderedPairList +{ + + sbn::OrderedPairList vars; + + char** itemPtr = environ; + while (const char* item = *(itemPtr++)) { + + const char* sep = strchr(item, '='); + + std::string name, value; + if (sep) { + name = std::string{ item, sep++ }; + value = std::string{ sep }; + } + else { + mf::LogDebug{ fLogCategory } + << "Environment element '" << item << "' is not in name=value form."; + name = item; + } + + // apply drop filters + if (matchPatterns(name, fDropVars) < fDropVars.size()) + continue; + + vars.items.emplace_back(std::move(name), std::move(value)); + + } // while + + vars.finish(); + + return vars; + +} // sbn::JobEnvironmentInfoExtractor::extractEnvironmentVariables() + + +// ----------------------------------------------------------------------------- +std::size_t sbn::JobEnvironmentInfoExtractor::matchPatterns + (std::string const& value, std::vector const& patterns) +{ + for (auto const& [ iPattern, pattern ]: util::enumerate(patterns)) + if (std::regex_match(value, pattern)) return iPattern; + return patterns.size(); +} + + +// ----------------------------------------------------------------------------- +std::vector sbn::JobEnvironmentInfoExtractor::prepareRegEx + (std::vector const& patterns) +{ + std::vector regex; + regex.reserve(patterns.size()); + for (std::string const& pattern: patterns) regex.emplace_back(pattern); + return regex; +} + + +// ----------------------------------------------------------------------------- diff --git a/sbncode/Metadata/JobEnvironmentInfoExtractor.h b/sbncode/Metadata/JobEnvironmentInfoExtractor.h new file mode 100644 index 000000000..4c480a3a2 --- /dev/null +++ b/sbncode/Metadata/JobEnvironmentInfoExtractor.h @@ -0,0 +1,146 @@ +/** + * @file sbncode/Metadata/JobEnvironmentInfoExtractor.h + * @brief Algorithm extracting information from the job execution environment. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 16, 2025 + * @see sbncode/Metadata/JobEnvironmentInfoExtractor.cxx + */ + +#ifndef SBNCODE_METADATA_JOBENVIRONMENTINFOEXTRACTOR_H +#define SBNCODE_METADATA_JOBENVIRONMENTINFOEXTRACTOR_H + + +// local libraries +#include "sbnobj/Common/Metadata/JobEnvironmentInfo.h" + +// C++ standard libraries +#include +#include +#include + + +// ----------------------------------------------------------------------------- +namespace art { class ModuleDescription; } // forward declarations + +// ----------------------------------------------------------------------------- +namespace sbn { class JobEnvironmentInfoExtractor; } +/** + * @brief Extracts job execution environment information. + * + * In its simplest form, the information is saved into a + * `sbn::JobEnvironmentInfo` object: + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp} + * sbn::JobEnvironmentInfoExtractor extractor; + * sbn::JobEnvironmentInfo const info = extractor(moduleDescription()); + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The `moduleDescription()` is a _art_ module member function, and as such this + * call is bound to the _art_ framework. However, if the context does not allow + * for a `art::ModuleDescription` object, partial information can still be + * retrieved which does not need that object. + * + * + * Configuration + * -------------- + * + * The configuration object, `Config`, includes the following configurable + * elements: + * * `dropVars`: a list of regular expression (`std::regex`) patterns to be + * matched to the environment variable names; if the name of a variable + * matches any of the the full patterns specified here, its value is not + * included in the metadata. This is useful to exclude known irrelevant + * variables (for example, shell functions). + * * `repositories`: a list of names of repositories to query for their + * version. For each repository ``, an attempt to load a _art_ tool + * named `RepositoryVersion` with an empty configuration parameter + * set is attempted, and on success all the information obtained from the + * tool is integrated into the metadata. + * * `logCategory`: this algorithm uses messagefacility library to emit + * messages to console. This is the category these messages are assigned to. + * + * + */ +class sbn::JobEnvironmentInfoExtractor { + + public: + + /// Configuration record. + struct Config { + + /// Default value of `dropVars`. + static std::vector const DefaultDropVars; + + /// Remove environment variables with names matching these patterns. + std::vector dropVars = DefaultDropVars; + + /// Look for the GIT version of these repositories. + std::vector repositories{ "sbncode" }; + + std::string logCategory = "JobEnvironmentInfoExtractor"; + + // needed because of compiler bugs + // https://bugs.llvm.org/show_bug.cgi?id=36684 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96645 + Config() {} + + }; // Config + + + /// Constructor with configuration. + JobEnvironmentInfoExtractor(Config const& config = Config{}); + + /// Extracts all the information from the environment. + sbn::JobEnvironmentInfo extract + (art::ModuleDescription const& moduleInfo); // not guaranteed to be const! + + /** + * @brief Extracts all the information from the environment. + * @param moduleInfo information about the _art_ module being executed + * @return an object with all extracted metadata + */ + sbn::JobEnvironmentInfo operator() (art::ModuleDescription const& moduleInfo) + { return extract(moduleInfo); } + + // --- BEGIN --- Partial metadata extraction ------------------------------- + /** + * @name Partial metadata extraction + * + * The member functions in this group allow the extraction of part of the job + * metadata. The function `extract()` includes the information from all of + * them, plus some more. + */ + /// @{ + + /// Returns the environment variables. + sbn::OrderedPairList extractEnvironmentVariables() const; + + /// Returns the version of the source repositories. + sbn::OrderedPairList extractSourceVersions() const; + + /// @} + // --- END ----- Partial metadata extraction ------------------------------- + + private: + + std::vector fDropVars; ///< Remove variables with matching names. + + ///< Names of the repositories to ask for a version report to. + std::vector fRepositoryReports; + + std::string fLogCategory; + + /// Converts all the patterns into regular expression objects. + static std::vector prepareRegEx + (std::vector const& patterns); + + /// @brief Returns the index of the first pattern `value` matches. + /// @return index of the first matched pattern, `patterns.size()` if none + static std::size_t matchPatterns + (std::string const& value, std::vector const& patterns); + +}; // sbn::JobEnvironmentInfoExtractor + + +// ----------------------------------------------------------------------------- + +#endif // SBNCODE_METADATA_JOBENVIRONMENTINFOEXTRACTOR_H diff --git a/sbncode/Metadata/RepositoryVersionReportUtils.cpp b/sbncode/Metadata/RepositoryVersionReportUtils.cpp new file mode 100644 index 000000000..5e9665657 --- /dev/null +++ b/sbncode/Metadata/RepositoryVersionReportUtils.cpp @@ -0,0 +1,71 @@ +/** + * @file sbncode/Metadata/RepositoryVersionReportUtils.cxx + * @brief Interface for _art_ tool reporting the version of packages. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 18, 2025 + * @see sbncode/Metadata/RepositoryVersionReportUtils.h + */ + +// library header +#include "sbncode/Metadata/RepositoryVersionReportUtils.h" + +// SBN libraries +#include "sbncode/Metadata/RepositoryVersionReporter.h" + +// framework libraries +#include "art/Utilities/make_tool.h" +#include "canvas/Utilities/Exception.h" +#include "messagefacility/MessageLogger/MessageLogger.h" +#include "fhiclcpp/ParameterSet.h" + +// C/C++ standard library +#include // std::unique_ptr<> + + +// ----------------------------------------------------------------------------- +bool sbn::addVersionFromRepository + (sbn::OrderedPairList& versionList, std::string const& repoName) +{ + + std::string const toolName = repoName + "RepositoryVersion"; + + fhicl::ParameterSet config; + config.put("tool_type", toolName); + + std::unique_ptr reportTool; + try { + reportTool = art::make_tool(config); + } + catch(cet::exception const& e) { + mf::LogWarning{ "RepositoryVersionReporter" } + << "No report from repository '" << repoName << "': tool not found."; + mf::LogDebug{ "RepositoryVersionReporter" } << "Error:\n" << e; + return false; + } + + // blindly add everything + versionList.items.insert + (versionList.items.end(), reportTool->begin(), reportTool->end()); + + return true; + +} // sbn::addVersionFromRepository() + + +// ----------------------------------------------------------------------------- +std::vector sbn::addVersionFromRepositories( + sbn::OrderedPairList& versionList, std::vector const& repoNames +) { + + std::vector missingRepos; + for (std::string const& repoName: repoNames) { + if (addVersionFromRepository(versionList, repoName)) continue; + missingRepos.push_back(repoName); + } + + return repoNames; + +} // sbn::addVersionFromRepositories() + + +// ----------------------------------------------------------------------------- diff --git a/sbncode/Metadata/RepositoryVersionReportUtils.h b/sbncode/Metadata/RepositoryVersionReportUtils.h new file mode 100644 index 000000000..1c7cea867 --- /dev/null +++ b/sbncode/Metadata/RepositoryVersionReportUtils.h @@ -0,0 +1,60 @@ +/** + * @file sbncode/Metadata/RepositoryVersionReportUtils.h + * @brief Interface for _art_ tool reporting the version of packages. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 18, 2025 + * @see sbncode/Metadata/RepositoryVersionReportUtils.cxx + */ + +#ifndef SBNCODE_METADATA_REPOSITORYVERSIONREPORTUTILS_H +#define SBNCODE_METADATA_REPOSITORYVERSIONREPORTUTILS_H + + +// SBN libraries +#include "sbnobj/Common/Metadata/OrderedPairList.h" + +// C/C++ standard library +#include +#include + + +// ----------------------------------------------------------------------------- +namespace sbn { + + /** + * @brief Adds versions from the specified repository. + * @param[out] versionList version list to add to + * @param repoName name of the repository to be queried + * @return whether the repository plugin could be loaded + * + * The plugin with name `repoName + "RepositoryVersion"` is loaded and its + * report is appended to `versionList`. + * It will be necessary to call `versionList.finish()` after all the additions + * are done. + * + * This utility is used in the + * @ref SBNsourceMetadataSystem "repository version tracking system". + */ + bool addVersionFromRepository + (sbn::OrderedPairList& versionList, std::string const& repoName); + + + /** + * @brief Adds versions from the specified repositories. + * @param[out] versionList version list to add to + * @param repoNames name of the repositories to be queried + * @return which repositories which could not be reached + * @see addVersionFromRepository + * + * This is equivalent to repeatedly calling `addVersionFromRepository()`. + */ + std::vector addVersionFromRepositories( + sbn::OrderedPairList& versionList, std::vector const& repoNames + ); + +} // namespace sbn + + +// ----------------------------------------------------------------------------- + +#endif // SBNCODE_METADATA_REPOSITORYVERSIONREPORTUTILS_H diff --git a/sbncode/Metadata/RepositoryVersionReporter.h b/sbncode/Metadata/RepositoryVersionReporter.h new file mode 100644 index 000000000..55e2e05bf --- /dev/null +++ b/sbncode/Metadata/RepositoryVersionReporter.h @@ -0,0 +1,75 @@ +/** + * @file sbncode/Metadata/RepositoryVersionReporter.h + * @brief Interface for _art_ tool reporting the version of packages. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 18, 2025 + * + */ + +#ifndef SBNCODE_METADATA_REPOSITORYVERSIONREPORTER_H +#define SBNCODE_METADATA_REPOSITORYVERSIONREPORTER_H + + +// SBN libraries +#include "sbnobj/Common/Metadata/OrderedPairList.h" + +// C/C++ standard library +#include // std::distance() + + +// ----------------------------------------------------------------------------- +namespace sbn { class RepositoryVersionReporter; } +/** + * @brief Interface for _art_ tool reporting the version of packages. + * + * An object reporting package versions will return them as a range of pairs + * package name/package version, each element being a string. + * + * This interface is not at all generic, and the iterators that are returned + * are exactly `const_iterator` ones defined in this interface, which are + * iterators of the collection `vector_t` also defined here, which is exactly + * a `std::vector`. + * + * @note At this point there is no polymorphic interface: derived classes will + * fetch their data at construction time and keep it for anybody curious + * to learn it. In the future an actual virtual interface could become + * necessary, and at that point it would be clearer what it should + * contain. + * + */ +struct sbn::RepositoryVersionReporter { + + /// Type of the name of a package (a `std::string`). + using PackageName_t = sbn::OrderedPairList::Key_t; + + /// Type of the version of a package (a `std::string`). + using PackegaVersion_t = sbn::OrderedPairList::Value_t; + + // standard C++ container types + using value_type = sbn::OrderedPairList::value_type; + using size_type = sbn::OrderedPairList::size_type; + using const_iterator = sbn::OrderedPairList::const_iterator; + + + /// Collected packages and their versions. + sbn::OrderedPairList packageVersions; + + + /// Begin-iterator for the package-version pairs. + const_iterator begin() const { return packageVersions.begin(); } + + /// End-iterator for the package-version pairs. + const_iterator end() const { return packageVersions.end(); } + + /// Returns whether there is no entry in the report. + bool empty() const noexcept { return begin() == end(); } + + /// Returns the number of items in the report. + size_type size() const { return std::distance(begin(), end()); } + +}; // sbn::RepositoryVersionReporter + + +// ----------------------------------------------------------------------------- + +#endif // SBNCODE_METADATA_REPOSITORYVERSIONREPORTER_H diff --git a/sbncode/Metadata/SBNsourceMetadataSystem.dox b/sbncode/Metadata/SBNsourceMetadataSystem.dox new file mode 100644 index 000000000..fa843ba25 --- /dev/null +++ b/sbncode/Metadata/SBNsourceMetadataSystem.dox @@ -0,0 +1,367 @@ +/** @page SBNsourceMetadataSystem Source code and execution environment metadata system in SBN code +@author Gianluca Petrillo (petrillo@slac.stanford.edu) + + +At highest level, the plugin `SaveJobEnvironment` allows storage of metadata +into the _art_/ROOT and `TFileService` files, and the output module +`DumpJobEnvironment` prints the information saved in a _art_/ROOT file on +console. + +The system to achieve that result is quite convoluted, and this documentation +will describe that in detail. + + +Which information is extracted +=============================== + +The extraction algorithm, `sbn::JobEnvironmentInfoExtractor`, includes +information ("metadata") about: + * The execution environment of the job: + * the complete dump of environment variables (except the ones identified as + shell functions); when using UPS, this can be used to track the version + of the products (UPS packages) set up during the job. + * The GIT version of the repositories used during the build process of the + code for some selected source code repositories. + * Information from the software being executed: + * _art_ process name; + * _art_ version. + +This information is extracted for each _art_ process; since typically each final +output file is the result of a sequence of jobs, the output files will contain +multiple metadata sets, for each of the input and job that ended into the file +data. + + +How the information is stored for later use +============================================ + +The goal of the system is to save the metadata into each output file, so that it +is always known wherever the data is. + +Our current data system includes four output carriers: + 1. _art_/ROOT files, managed directly by _art_ and with a rigid format; + 2. `TFileService` supplemental ROOT file, also managed by _art_ but with a + format that is freer; + 3. LArCV files shipped to SPINE reconstruction framework; + 4. Common Analysis Format (CAF), managed by SBN via a _art_ module. + +At the moment of writing (at the inception of this system), the metadata is only +written into the _art_/ROOT and the `TFileService` output files. + +In addition, the system needs a way to store some of the information that would +not be available at run time: an example is the version of the software included +in the build, which is usually a GIT tag that does not survive to the +distributed binary code. +The options of where to keep this information include: as a executable code +(library) that software can link to and call/read; FHiCL configuration that jobs +can include; simple text files that jobs can read. + +There are limitations and shortcomings in each of the options. +The choice of this system was to include the version into an executable library. +The reason why the text file was not chosen is that it requires a convention of +where the file can be found, which is not trivial given the relocation of +distributed code. +The reason why FHiCL configuration was not preferred is that only _art_/ROOT +files automatically include that information. In addition, each job +configuration needs to know _exactly_ which repositories are being included, +since FHiCL parser does not tolerate missing information. The first limitation +can be worked around, for example with the experimental `icaruscode` module +`SaveConfigurationIntoTFile`, which was never fully completed or tested. +The selected option, library code, has ways to address these issues. There is +already a tracking system in place for the libraries, and _art_ has a "factory" +system that can find by name libraries (plug-ins) to be loaded dynamically. +As for the comparison with FHiCL, the code can be configured to look for a large +number of repository libraries, and if one is missing, the resulting error +(for example a thrown C++ exception) can be programmatically caught and handled. +The limitation of this option will be discussed later in this section. + +The difficulty of this system is that it needs to connect very heterogeneous +information sources, which never available all at the same time. +The source repository is available only in the build stage, while the execution +environment is present only... well, during the job execution. +The strategy is then to create one shared library for each repository, with +a standard name (`RepositoryVersion_\`), with the repository +version information, and to have some modular software that can read these +libraries, collect their information in a single place and then write it. +This is how it was implemented for the single repository named : + 1. CMake discovers the name of the repository from its remote source + (`git remote origin get-url`). + 2. CMake executes a `git describe` call (`git` executable must be available). + 3. CMake generates a C++ header and source file via `configure_file()` starting + from two template files (`GITrepoVersion.{h,cxx}.in`), and creates the + source files `RepositoryVersion_.{h,cxx}`. + 4. CMake creates the local directory library as usual + (e.g. `art_make_library`), which will include the two generated files too. + 5. The two generated files are added to the list of files to install. + +This happens in the source directory that is going to be hosting the library, by +convention `/Metadata` (e.g. in `sbncode/Metadata/CMakeLists.txt`). +The first three steps are collected in macros saved in `SBNutils.cmake`, which +must be included. + +A relevant question with this method is where the `SBNutils.cmake` macro library +should be hosted. The chosen repository must be at the root of the dependencies +of all the repositories that we need to version, but there is not such a root: +`sbnobj` (hosting _art_/ROOT data products) depends only on `lardataobj`, and +`sbnanaobj` (CAF data objects) depends only on ROOT (so, ironically, the root +dependency in our source tree _is_ in fact ROOT, but unfortunately it does not +make sense to add this infrastructure in there). The practical workaround to +this riddle is to duplicate the macro file, which obvious maintenance +consequences. + +@note An **important limitation** of this system is that the information is + extracted by CMake, when the build system decides to run CMake and when + CMake decides to (re)generate the files -- which may be never: at the + moment of writing, the details are not yet clear. + To make sure that the GIT versions are correct, the only safe way is to + zap the area (`mrb z`) and build everything from scratch. This is always + a good idea anyway after one thinks the code is ready for prime time. + Again, remember: after code is committed to GIT, an integral rebuild is + likely needed for the code version metadata to reflect the new repository + status. + +@note The presence of an actual library do be dynamically linked to is necessary + and can't be replaced by inline definitions in the header files. + The reason is that the system needs to support some level of delegation + (see the next section) where a package needs to provide information of + a different one which can't on its own. For example, `sbnobj` can extract + and store its own version, but it lacks the dependencies necessary to + provide it to _art_. Therefore, `sbncode` will be in charge of saving + `sbnobj` metadata in addition to its own. Given this structure, if + `sbnobj` version is stored in a header, `sbncode` will learn it at compile + time and keep it hard-coded. It is possible though that the job loads an + updated `sbnobj` instead of the one used during `sbncode` build, in which + case `sbncode` would still report the version hard-coded at the time of + its own build. + This practice is prone to many issues (the scenario described here + outlines a common one) and should be avoided; however, the metadata system + implemented here is a diagnostic tools and as such should be designed to + be more resilient than average. + + +How to arrange a repository with a version library +--------------------------------------------------- + +1. `SBNutils.cmake` needs to be included. It is stored, willy-nilly, in + `sbnobj`. The most obvious option is to add things into the main + `CMakeLists.txt` file of the repository; namely: + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + find_package( sbnobj REQUIRED ) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + in the `find_package` section not present already, and + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + include(SBNutils) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + just after the inclusion of CETmodules/_art_ macro files and definitely + _before_ the `add_subdirectory()` calls. +2. Check the directory `/Metadata`. + 1. If it does not exist or does not contain a `CMakeLists.txt`: + 1. create it, and add the line + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + add_subdirectory(Metadata) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + to the `/CMakeLists.txt` (in the same directory where + the new `Metadata` directory is); + 2. copy into there from `sbnobj/Metadata` the file `CMakeLists.txt`, + which should be portable enough; or add lines as below. + 2. If it does exist, add to its `CMakeLists.txt`: + 1. at the top of the file (before the build commands): + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + GenerateRepoVersionSource(${CMAKE_PROJECT_NAME}) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + which calls the source generation macro from SBNutils; + 2. at the end add to the usual `install_Xxxx()` calls these ones: + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + install_headers(LIST "${CMAKE_CURRENT_BINARY_DIR}/${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.h") + install_source(LIST "${CMAKE_CURRENT_BINARY_DIR}/${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.cxx") + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + which add the `.cxx` and `.h` files to the files to install; + 3. make sure that there is a `art_make_library()` call or equivalent + build command in the file; if there is none, or if there are only + plugin or dictionary build calls (`e.g. `cet_build_plugin()`), add + explicitly a + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + art_make_library() + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + or a more explicit (and _art_-independent) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + cet_make_library( + SOURCE + "${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.h" + "${${CMAKE_PROJECT_NAME}_GIT_REPO_VERSION_SOURCE}.cxx" + ) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + which will create a library with only the source file generated here. +3. Copy into there from `sbnobj/Metadata` the files `GITrepoVersion.{h,cxx}.in`. + Or write some custom ones (but remember that the _art_ tool needs the + implementation details). + +Note that in principle every `${CMAKE_PROJECT_NAME}` above can be replaced with +the actual CMake project name, hard-coded (e.g. `sbncode`). The only drawback is +that such lines need a further editing when copied into other repositories. + + + +How the information is extracted and serialized in the output +============================================================== + +When a job is executed, the version of all the repositories should ideally be +loaded and then saved into all the relevant output files. +The strategy for _art_ jobs (`lar`) is no less complex than the one described in +the previous paragraph. The choice was to save the information into a _art_ data +product. +Among the complications: + * The data product should be stored to have the least possible duplication. + * Metadata from the input files needs to be propagated into the output files; + and there can be multiple input files, multiple output files, and not in a + one-to-one correspondence. + +This is the general strategy: + + 1. Metadata is stored into a _results-level data product_, which is an + output-file-level data product (as opposed to run-level, subrun-level or + event-level) bound to the output module (e.g. `RootOutput`). + The class of the data product is defined in `sbnobj`. + 2. A plugin will take care of writing the metadata as a data product. This + plugin is specific to output modules like `RootOutput` and is somehow + different, but similar, to _art_ modules. The plugin has also the task of + attempting to collect the relevant information from all the possible + source repositories, as listed in the FHiCL configuration of the plugin. + If the information from a source repository is not available, that source is + simply not included in the metadata. The output plugin is defined in + `sbncode`, which is the lowest _art_-aware repository under our control. + 3. The plugin, called `SaveJobEnvironment`, calls an extractor utility, + `sbn::JobEnvironmentInfoExtractor`, which utilizes _art_ tools to + dynamically load information from all the source repositories. + Tools have conventional names (`RepositoryVersion`). + 4. Each tool is given the chance to provide arbitrary metadata, but they are + expected to fill in with the name and version of their repository. + +The factorization of the extractor utility from the output plugin allows to use +the utility also on other contexts, for example when running CAF maker. However, +because of the use of the _art_ tool infrastructure, it stills requires _art_ to +be functional. In addition, some of the information is extracted from _art_ +itself (e.g. the process name), so _art_ is also necessary as information source +if the full information is desired. + +A limitation of the _art_-tool-based strategy is that the repository containing +the tool must depend on _art_. Several source repositories do not fulfil this +requirement, by design; e.g. `sbnobj` and `icarusalg`. These repositories still +need to be instrumented with CMake instructions to generate the repository +version library (repositories which do not create libraries are very rare), and +one of the plugins of the dependent repositories must be in charge of reading +the version of these repositories together with their own. So for example +`sbnobj` will generate a `sbnobj_Metadata` library, and _art_ tool +`sbnobjRepositoryVersion` in `sbncode` will link to both `sbncode::Metadata` +for the version of `sbncode`, and to `sbnobj::Metadata` for the version of +non-art-aware repository it depends on, `sbnobj`. Since the dependencies of +each repository are known and quite stable, the tool can hard-code them. +In addition, if a dependency is _art_-aware, the tool can take care of the +dependency subtree for that dependency recursively, by call the tool of that +dependency (duplicate metadata entries will be expected to be the same anyway). + + +### Results-level products + +Results-level products were introduced in _art_ as a mean to have data products +spanning multiple runs. While some interface is similar to the other principals +(`art::Event`, `art::Run`), their usage and accessibility is deeply different. + +The key point is this: **only output modules can access Results-level data +products** (not only `RootOutput` specifically). +This comes with a list of obvious and explicit clumsiness: there is no such a +thing like an _art_ module writing a Results-level data product. In fact, no +such a thing like an _art_ module _reading_ that data product either. +The `DumpJobEnvironment` facility offered here is in fact an _output module_: +producer and analysis modules can't learn the versions that the version +data products in the input files hold. They can however use the same algorithm +as `SaveJobEnvironment` uses (that is another output plugin) to extract the +versions it then saved in the Results-type data product: the information +extraction algorithm is factorized as `sbn::JobEnvironmentInfoExtractor` and its +return type is a class `sbn::JobEnvironmentInfo` (`sbnobj`) well suited to be +a _art_ data product. + + + +How to arrange a repository with the tool +------------------------------------------ + +The tools will depend on the repository version libraries of the current package +and its dependencies, and on _art_. It is recommended to place their source code +in the same directory as the repository version library. +Step by step: + + 1. Utilize the `/Metadata` directory where the version library + source code should already be. In that case, the name of that library for + linking will be `::Metadata`. + 2. Create the tool source `RepositoryVersion_tool.cc` by + copying the source code from + `sbncode/Metadata/sbncodeRepositoryVersion_tool.cc` or + `icaruscode/Metadata/icaruscodeRepositoryVersion_tool.cc`. + 3. Add to the `CMakeLists.txt` file instructions to build the tool: + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cmake} + cet_build_plugin(RepositoryVersion art::tool + LIBRARIES + ::Metadata + sbncode::Metadata + sbnobj::Common_Metadata + ) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Additional libraries may be needed for the dependencies. + 4. Customize the tool source. + * If this repository _has_ dependent non-art repositories, add them to the + tool (the one in `icaruscode`, `icaruscodeRepositoryVersion`, already + shows how, with `icarusalg`): link to the dependent version libraries, + and add metadata entries for them. + * If this repository _is_ a dependency of another _art_-aware repository, + considering updating the tool in that repository to load this one and + store information from it. The tool in `icaruscode`, + `icaruscodeRepositoryVersion`, shows how (with `sbncode`). + 5. In the relevant configurations of the output plugin `SaveJobEnvironment`, + add the name of the repository the tool is covering (the `` + above). + + +How metadata can be accessed from the input files +================================================== + +The plugin `SaveJobEnvironment` can write the metadata into the _art_/ROOT file +and into the `TFileService` output file. + +The metadata in the _art_/ROOT file is stored as a Results-level data product, +and can therefore be read only by output modules. That does mean that producer +and analyzer _art_ modules will not be able to access that metadata. +However, an output module `DumpJobEnvironment` is provided that dumps the full +metadata on console output (configurable via `message` service). +The configuration of that module is as simple as: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +outputs.metadataDumper: { + module_type: "DumpJobEnvironment" +} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +and, as usual, `metadataDumper` must be added in one of the end paths in the +`physics` configuration table. This type of configuration is provided in +`sbncode` as `dump_jobmetadata.fcl`. + +Metadata saved into a `TFileService` ROOT file will be in the main ROOT +directory of the file. See the documentation of `SaveJobEnvironment` plugin +for the details. + + + +Future improvements +==================== + +* Currently the metadata seen from all input so far is stored into the current + output file; it may happen then that an output file stores metadata from an + input file that was already closed when the output file started been filled. + That metadata has no reason to reside in that output file. +* There is going to be a large amount of duplication: all input files likely + have almost identical metadata, but we save each individually. A system to + collect the shared metadata in a single list may be desirable to reduce the + metadata size. +* If there is demand, the system can be extended to create also a FHiCL + configuration with the version of the repository. + + + */ diff --git a/sbncode/Metadata/SaveJobEnvironment_plugin.cc b/sbncode/Metadata/SaveJobEnvironment_plugin.cc new file mode 100644 index 000000000..70364dc4d --- /dev/null +++ b/sbncode/Metadata/SaveJobEnvironment_plugin.cc @@ -0,0 +1,331 @@ +/** + * @file sbncode/Metadata/SaveJobEnvironment_plugin.cc + * @brief Producer module writing job environment information into output. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 16, 2025 + */ + +// SBN libraries +#include "sbncode/Metadata/JobEnvironmentInfoExtractor.h" +#include "sbnobj/Common/Metadata/JobEnvironmentInfo.h" + +// framework libraries +#include "art_root_io/TFileService.h" +#include "art_root_io/TFileDirectory.h" +#include "art/Framework/Core/ModuleMacros.h" +#include "art/Framework/Core/ResultsProducer.h" +#include "art/Framework/Principal/Results.h" +#include "art/Framework/Principal/Handle.h" +#include "canvas/Utilities/Exception.h" +#include "messagefacility/MessageLogger/MessageLogger.h" +#include "fhiclcpp/types/Sequence.h" +#include "fhiclcpp/types/OptionalAtom.h" + +// ROOT libraries +#include "TFile.h" +#include "TDirectory.h" + +// C/C++ libraries +#include // std::back_inserter() +#include // std::make_unique() +#include +#include // std::move() +#include + + + +// ----------------------------------------------------------------------------- +namespace sbn { class SaveJobEnvironment; } +/** + * @brief Writes information from the job execution environment into output. + * + * This result-level module writes into the `art::Results` an object with + * information about the environment where the job is being executed. + * + * Information is extracted by the `sbn::JobEnvironmentInfoExtractor` algorithm. + * + * To add a new repository to the source of metadata collected by this plugin, + * see the instructions and explanations in + * @ref SBNsourceMetadataSystem "the SBN metadata system documentation". + * + * + * Output + * ------- + * + * In _art_/ROOT output file, `art::Results` level: + * * `std::vector` objects including all the ones + * found in the input files, and in addition, for this job: + * * a snapshot of the full environment (like in `getEnv()` C function), + * lexicographically sorted as for the object requirement. + * + * The information from this job is also saved in the `TFileService` output + * file, if that service is configured. Input information is currently not + * written into the `TFileService` output. Also note that the information is + * saved in the main directory of the file, as opposed to the usual subdirectory + * named after the module. The name of the object is `EnvInfo`, and, if + * `sbn::JobEnvironmentInfo` dictionary library is available to ROOT, one simple + * way to see it is to open the ROOT file with the ROOT interpreter and execute + * `EvtInfo->dump(std::cout);`. + * + * + * Module configuration + * --------------------- + * + * * `Repositories` (list of strings; default: see `DefaultRepositories`): + * the list of repositories whose version reporting tool will be queried. + * Directly passed as `JobEnvironmentInfoExtractor::Config::repositories` + * (see) to the exctraction algorithm. + * * `WriteToTFileService` (flag, optional): if set to `false`, the information + * will not be written into the `TFileService` output file; if it is set to + * `true`, it will be written into the output of file the service, which is + * required to be configured; if the parameter is omitted, the information + * will be written only if the service is available. + * + * + * Service dependencies + * --------------------- + * + * * `TFileService` (required only if `WriteToTFileService` is set to `true`) + * for output into the `TFile` managed by that service. + * + */ +class sbn::SaveJobEnvironment: public art::ResultsProducer { + + public: + + /// List of repository tools to be loaded by default. + static std::vector const DefaultRepositories; + + struct Config { + + using Name = fhicl::Name; + using Comment = fhicl::Comment; + + fhicl::Sequence Repositories{ + Name{ "Repositories" }, + Comment{ "Names of the repositories with version reporting tools" }, + DefaultRepositories + }; + + fhicl::OptionalAtom WriteToTFileService{ + Name{ "WriteToTFileService" }, + Comment + { "enables or disables writing to TFileService (default: if available)" } + }; + + }; // Config + + using Parameters = Table; + + + /// Constructor. + SaveJobEnvironment(Parameters const& params); + + /// Fetches the information. + virtual void beginJob() override; + + /// Reads the information on each new input file. + virtual void readResults(art::Results const& results) override; + + /// Writes the information at output closure time. + virtual void writeResults(art::Results& results) override; + + /// Clears after writing: does nothing. + virtual void clear() override; + + /// Write information to `TFileService` file if available. + virtual void endJob() override; + + + private: + + // --- BEGIN --- Configuration ----------------------------------------------- + + /// Names of the repositories to query for version metadata. + std::vector const fRepositoryNames; + + bool const fWriteToTFileService; ///< Whether to write info to `TFileService`. + + // --- END ----- Configuration ----------------------------------------------- + + + sbn::JobEnvironmentInfoExtractor fInfoExtractor; ///< Extraction algorithm. + + ///< All information from the input to be written, in the order it was read. + std::vector fInputInfo; + + /// Information from this job to be written. + sbn::JobEnvironmentInfo fJobInfo; + + + /// Fetches and returns all the information. + sbn::JobEnvironmentInfo fetchInformation(); + + + /// Writes the current information into `outDir`. + void writeInformationToTFile(TDirectory& outDir) const; + + /// Returns a correctly configured information extractor algorithm. + sbn::JobEnvironmentInfoExtractor makeInfoExtractor() const; + + + /// Returns whether we should write to `TFileService`. + static bool parseWriteToTFileService + (std::optional writeToTFileService); + + +}; // sbn::SaveJobEnvironment + + +// ----------------------------------------------------------------------------- +// --- Implementation +// ----------------------------------------------------------------------------- +std::vector const sbn::SaveJobEnvironment::DefaultRepositories { + "sbncode", "sbndcode", "icaruscode" +}; + + +// ----------------------------------------------------------------------------- +sbn::SaveJobEnvironment::SaveJobEnvironment(Parameters const& params) + : fRepositoryNames{ params().Repositories() } + , fWriteToTFileService + { parseWriteToTFileService(params().WriteToTFileService()) } + , fInfoExtractor{ makeInfoExtractor() } +{ + + produces>(); + + if (fWriteToTFileService) { + mf::LogInfo{ "SaveJobEnvironment" } + << "Will also save information into TFileService output file."; + } + +} // sbn::SaveJobEnvironment::SaveJobEnvironment() + + +// ----------------------------------------------------------------------------- +void sbn::SaveJobEnvironment::beginJob() { + + fJobInfo = fetchInformation(); + +} + + +// ----------------------------------------------------------------------------- +void sbn::SaveJobEnvironment::readResults(art::Results const& results) { + + std::vector const infoHandles + = results.getMany>(); + + mf::LogDebug{ "SaveJobEnvironment" } + << "Found " << infoHandles.size() << " job information products in input."; + + for (art::Handle> const& infoHandle + : infoHandles) + { + if (!infoHandle.isValid()) continue; + std::copy + (infoHandle->begin(), infoHandle->end(), back_inserter(fInputInfo)); + } // for + +} // sbn::SaveJobEnvironment::readResults() + + +// ----------------------------------------------------------------------------- +void sbn::SaveJobEnvironment::writeResults(art::Results& results) { + + mf::LogDebug{ "SaveJobEnvironment" } + << "Information saved into art/ROOT output file:\n\n" << fJobInfo; + + auto allInfo + = std::make_unique>(fInputInfo); + allInfo->push_back(fJobInfo); + + // one copy each call + results.put(std::move(allInfo)); + +} + + +// ----------------------------------------------------------------------------- +void sbn::SaveJobEnvironment::clear() { +} + + +// ----------------------------------------------------------------------------- +void sbn::SaveJobEnvironment::endJob() { + + if (fWriteToTFileService) { + + writeInformationToTFile(art::ServiceHandle()->file()); + + } + +} // sbn::SaveJobEnvironment::endJob() + + +// ----------------------------------------------------------------------------- +sbn::JobEnvironmentInfo sbn::SaveJobEnvironment::fetchInformation() { + + return fInfoExtractor.extract(moduleDescription()); + +} + + +// ----------------------------------------------------------------------------- +void sbn::SaveJobEnvironment::writeInformationToTFile(TDirectory& outDir) const +{ + + // assuming copy constructor will be used + outDir.WriteObject(&fJobInfo, "EnvInfo"); + +} // sbn::SaveJobEnvironment::writeInformationToTFile() + + +// ----------------------------------------------------------------------------- +bool sbn::SaveJobEnvironment::parseWriteToTFileService + (std::optional writeToTFileService) +{ + // if is requested that we don't write it to TFileService, stop it here: + if (!writeToTFileService.value_or(true)) return false; + + // so either there is no explicit request, or that request is to use it: + // at this point, we do need to know if TFileService is available + // and if it's there, we'll want to write into it + try { + art::ServiceHandle(); + return true; + } + catch(art::Exception& e) { + if (e.categoryCode() != art::errors::ServiceNotFound) + throw; // something else entirely is happening: propagate the exception + + // not available: do we need it? + if (!writeToTFileService) return false; // no explicit request, so no + + // there is a request (and if we are here it was positive): complain! + throw art::Exception{ art::errors::Configuration, "", e } + << "SaveJobEnvironment explicitly requested saving into TFileService" + " output, but TFileService is not configured.\n"; + + } + +} // sbn::SaveJobEnvironment::parseWriteToTFileService() + + +// ----------------------------------------------------------------------------- +sbn::JobEnvironmentInfoExtractor +sbn::SaveJobEnvironment::makeInfoExtractor() const { + + JobEnvironmentInfoExtractor::Config config; + config.repositories = fRepositoryNames; + return JobEnvironmentInfoExtractor{ config }; + +} // sbn::SaveJobEnvironment::makeInfoExtractor() + + +// ----------------------------------------------------------------------------- +DEFINE_ART_RESULTS_PLUGIN(sbn::SaveJobEnvironment) + + +// ----------------------------------------------------------------------------- diff --git a/sbncode/Metadata/sbncodeRepositoryVersion_tool.cc b/sbncode/Metadata/sbncodeRepositoryVersion_tool.cc new file mode 100644 index 000000000..8941ac4e2 --- /dev/null +++ b/sbncode/Metadata/sbncodeRepositoryVersion_tool.cc @@ -0,0 +1,62 @@ +/** + * @file RepositoryVersion_sbncode_tool.cc + * @brief _art_ tool reporting the version of `sbncode`-related packages. + * @author Gianluca Petrillo (petrillo@slac.stanford.edu) + * @date January 18, 2025 + * + */ + +// SBN libraries +#include "sbncode/Metadata/RepositoryVersionReporter.h" +#include "sbncode/Metadata/RepositoryVersion_sbncode.h" +#include "sbnobj/Metadata/RepositoryVersion_sbnobj.h" + +// framework libraries +#include "art/Utilities/ToolMacros.h" +#include "art/Utilities/ToolConfigTable.h" +#include "messagefacility/MessageLogger/MessageLogger.h" + +// C++ standard libraries +#include +#include + + +// ----------------------------------------------------------------------------- +namespace sbn { struct sbncodeRepositoryVersion; } +/** + * @brief Implements the `sbn::RepositoryVersionReporter` interface for + * `sbncode`. + * + * It collects information from the following repositories: `sbncode` and + * `sbnobj`. + * + */ +struct sbn::sbncodeRepositoryVersion: public sbn::RepositoryVersionReporter { + + struct Config {}; + + using Parameters = art::ToolConfigTable; + + sbncodeRepositoryVersion(Parameters const&); + +}; // sbn::sbncodeRepositoryVersion() + + +// ----------------------------------------------------------------------------- +// --- implementation +// ----------------------------------------------------------------------------- +sbn::sbncodeRepositoryVersion::sbncodeRepositoryVersion(Parameters const&) { + + packageVersions.items.emplace_back("sbnobj", ::RepositoryVersion_sbnobj); + packageVersions.items.emplace_back("sbncode", ::RepositoryVersion_sbncode); + + packageVersions.finish(); + +} + + +// ----------------------------------------------------------------------------- +DEFINE_ART_CLASS_TOOL(sbn::sbncodeRepositoryVersion) + + +// -----------------------------------------------------------------------------