|
| 1 | +/* |
| 2 | + * Copyright 2023 Veloman Yunkan <veloman.yunkan@gmail.com> |
| 3 | + * |
| 4 | + * This program is free software; you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU General Public License as published by |
| 6 | + * the Free Software Foundation; either version 3 of the License, or |
| 7 | + * any later version. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | + * GNU General Public License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the GNU General Public License |
| 15 | + * along with this program; if not, write to the Free Software |
| 16 | + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
| 17 | + * MA 02110-1301, USA. |
| 18 | + */ |
| 19 | + |
| 20 | +#include "metadata.h" |
| 21 | + |
| 22 | +#include <sstream> |
| 23 | +#include <regex> |
| 24 | +#include <unicode/unistr.h> |
| 25 | + |
| 26 | +#include <cctype> |
| 27 | +#include <iomanip> |
| 28 | + |
| 29 | + |
| 30 | +namespace zim |
| 31 | +{ |
| 32 | + |
| 33 | +namespace |
| 34 | +{ |
| 35 | + |
| 36 | +const bool MANDATORY = true; |
| 37 | +const bool OPTIONAL = false; |
| 38 | + |
| 39 | +const std::string LANGS_REGEXP = "\\w{3}(,\\w{3})*"; |
| 40 | +const std::string DATE_REGEXP = R"(\d\d\d\d-\d\d-\d\d)"; |
| 41 | + |
| 42 | +// PNG regexp has to be defined in such a tricky way because it includes |
| 43 | +// a NUL character |
| 44 | +const char PNG_REGEXP_DATA[] = "^\x89\x50\x4e\x47\x0d\x0a\x1a\x0a(.|\\s|\0)+"; |
| 45 | +const std::string PNG_REGEXP(PNG_REGEXP_DATA, sizeof(PNG_REGEXP_DATA)-1); |
| 46 | + |
| 47 | +bool matchRegex(const std::string& regexStr, const std::string& text) |
| 48 | +{ |
| 49 | + const std::regex regex(regexStr); |
| 50 | + return std::regex_match(text.begin(), text.end(), regex); |
| 51 | +} |
| 52 | + |
| 53 | +size_t getTextLength(const std::string& utf8EncodedString) |
| 54 | +{ |
| 55 | + return icu::UnicodeString::fromUTF8(utf8EncodedString).length(); |
| 56 | +} |
| 57 | + |
| 58 | +class MetadataComplexCheckBase |
| 59 | +{ |
| 60 | +public: |
| 61 | + const std::string description; |
| 62 | + const MetadataComplexCheckBase* const prev; |
| 63 | + |
| 64 | +public: // functions |
| 65 | + explicit MetadataComplexCheckBase(const std::string& desc); |
| 66 | + |
| 67 | + MetadataComplexCheckBase(const MetadataComplexCheckBase&) = delete; |
| 68 | + MetadataComplexCheckBase(MetadataComplexCheckBase&&) = delete; |
| 69 | + void operator=(const MetadataComplexCheckBase&) = delete; |
| 70 | + void operator=(MetadataComplexCheckBase&&) = delete; |
| 71 | + |
| 72 | + virtual ~MetadataComplexCheckBase(); |
| 73 | + |
| 74 | + virtual bool checkMetadata(const Metadata& m) const = 0; |
| 75 | + |
| 76 | + static const MetadataComplexCheckBase* getLastCheck() { return last; } |
| 77 | + |
| 78 | +private: // functions |
| 79 | + static const MetadataComplexCheckBase* last; |
| 80 | +}; |
| 81 | + |
| 82 | +const MetadataComplexCheckBase* MetadataComplexCheckBase::last = nullptr; |
| 83 | + |
| 84 | +MetadataComplexCheckBase::MetadataComplexCheckBase(const std::string& desc) |
| 85 | + : description(desc) |
| 86 | + , prev(last) |
| 87 | +{ |
| 88 | + last = this; |
| 89 | +} |
| 90 | + |
| 91 | +MetadataComplexCheckBase::~MetadataComplexCheckBase() |
| 92 | +{ |
| 93 | + // Ideally, we should de-register this object from the list of live objects. |
| 94 | + // However, in the current implementation MetadataComplexCheckBase objects |
| 95 | + // are only constructed in static storage and the list of active objects |
| 96 | + // isn't supposed to be accessed after any MetadataComplexCheckBase object |
| 97 | + // has been destroyed as part of program termination clean-up actions. |
| 98 | +} |
| 99 | + |
| 100 | +#define ADD_METADATA_COMPLEX_CHECK(DESC, CLSNAME) \ |
| 101 | +class CLSNAME : public MetadataComplexCheckBase \ |
| 102 | +{ \ |
| 103 | +public: \ |
| 104 | + CLSNAME() : MetadataComplexCheckBase(DESC) {} \ |
| 105 | + bool checkMetadata(const Metadata& data) const override; \ |
| 106 | +}; \ |
| 107 | + \ |
| 108 | +const CLSNAME CONCAT(obj, CLSNAME); \ |
| 109 | + \ |
| 110 | +bool CLSNAME::checkMetadata(const Metadata& data) const \ |
| 111 | +/* should be followed by the check body */ |
| 112 | + |
| 113 | + |
| 114 | + |
| 115 | +#define CONCAT(X, Y) X##Y |
| 116 | +#define GENCLSNAME(UUID) CONCAT(MetadataComplexCheck, UUID) |
| 117 | + |
| 118 | +#define METADATA_ASSERT(DESC) ADD_METADATA_COMPLEX_CHECK(DESC, GENCLSNAME(__LINE__)) |
| 119 | + |
| 120 | + |
| 121 | +#include "metadata_constraints.cpp" |
| 122 | + |
| 123 | +// This function is intended for pretty printing of regexps with non-printable |
| 124 | +// characters. |
| 125 | +// In a general purpose/rigorous version we should escape the escape symbol |
| 126 | +// (backslash) too, but that doesn't play well with the purpose stated above. |
| 127 | +std::string escapeNonPrintableChars(const std::string& s) |
| 128 | +{ |
| 129 | + std::ostringstream os; |
| 130 | + os << std::hex; |
| 131 | + for (const char c : s) { |
| 132 | + if (std::isprint(c)) { |
| 133 | + os << c; |
| 134 | + } else { |
| 135 | + const unsigned int charVal = static_cast<unsigned char>(c); |
| 136 | + os << "\\x" << std::setw(2) << std::setfill('0') << charVal; |
| 137 | + } |
| 138 | + } |
| 139 | + return os.str(); |
| 140 | +} |
| 141 | + |
| 142 | +Metadata::Errors concat(Metadata::Errors e1, const Metadata::Errors& e2) |
| 143 | +{ |
| 144 | + e1.insert(e1.end(), e2.begin(), e2.end()); |
| 145 | + return e1; |
| 146 | +} |
| 147 | + |
| 148 | +} // unnamed namespace |
| 149 | + |
| 150 | +const Metadata::ReservedMetadataTable& Metadata::reservedMetadataInfo = reservedMetadataInfoTable; |
| 151 | + |
| 152 | +const Metadata::ReservedMetadataRecord& |
| 153 | +Metadata::getReservedMetadataRecord(const std::string& name) |
| 154 | +{ |
| 155 | + for ( const auto& x : reservedMetadataInfo ) { |
| 156 | + if ( x.name == name ) |
| 157 | + return x; |
| 158 | + } |
| 159 | + |
| 160 | + throw std::out_of_range(name + " is not a reserved metadata name"); |
| 161 | +} |
| 162 | + |
| 163 | +bool Metadata::has(const std::string& name) const |
| 164 | +{ |
| 165 | + return data.find(name) != data.end(); |
| 166 | +} |
| 167 | + |
| 168 | +const std::string& Metadata::operator[](const std::string& name) const |
| 169 | +{ |
| 170 | + return data.at(name); |
| 171 | +} |
| 172 | + |
| 173 | +void Metadata::set(const std::string& name, const std::string& value) |
| 174 | +{ |
| 175 | + data[name] = value; |
| 176 | +} |
| 177 | + |
| 178 | +bool Metadata::valid() const |
| 179 | +{ |
| 180 | + return check().empty(); |
| 181 | +} |
| 182 | + |
| 183 | +Metadata::Errors Metadata::checkMandatoryMetadata() const |
| 184 | +{ |
| 185 | + Errors errors; |
| 186 | + for ( const auto& rmr : reservedMetadataInfo ) { |
| 187 | + if ( rmr.isMandatory && data.find(rmr.name) == data.end() ) { |
| 188 | + errors.push_back("Missing mandatory metadata: " + rmr.name ); |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + return errors; |
| 193 | +} |
| 194 | + |
| 195 | +Metadata::Errors Metadata::checkSimpleConstraints() const |
| 196 | +{ |
| 197 | + Errors errors; |
| 198 | + for ( const auto& nv : data ) { |
| 199 | + const auto& name = nv.first; |
| 200 | + const auto& value = nv.second; |
| 201 | + try { |
| 202 | + const auto& rmr = getReservedMetadataRecord(name); |
| 203 | + if ( rmr.minLength != 0 && getTextLength(value) < rmr.minLength ) { |
| 204 | + std::ostringstream oss; |
| 205 | + oss << name << " must contain at least " << rmr.minLength << " characters"; |
| 206 | + errors.push_back(oss.str()); |
| 207 | + } |
| 208 | + if ( rmr.maxLength != 0 && getTextLength(value) > rmr.maxLength ) { |
| 209 | + std::ostringstream oss; |
| 210 | + oss << name << " must contain at most " << rmr.maxLength << " characters"; |
| 211 | + errors.push_back(oss.str()); |
| 212 | + } |
| 213 | + if ( !rmr.regex.empty() && !matchRegex(rmr.regex, value) ) { |
| 214 | + const std::string regex = escapeNonPrintableChars(rmr.regex); |
| 215 | + errors.push_back(name + " doesn't match regex: " + regex); |
| 216 | + } |
| 217 | + } catch ( const std::out_of_range& ) { |
| 218 | + // ignore non-reserved metadata |
| 219 | + } |
| 220 | + } |
| 221 | + return errors; |
| 222 | +} |
| 223 | + |
| 224 | +Metadata::Errors Metadata::checkComplexConstraints() const |
| 225 | +{ |
| 226 | + Errors errors; |
| 227 | + const MetadataComplexCheckBase* c = MetadataComplexCheckBase::getLastCheck(); |
| 228 | + for ( ; c != nullptr ; c = c->prev ) { |
| 229 | + if ( ! c->checkMetadata(*this) ) { |
| 230 | + errors.push_back(c->description); |
| 231 | + } |
| 232 | + } |
| 233 | + return errors; |
| 234 | +} |
| 235 | + |
| 236 | +Metadata::Errors Metadata::check() const |
| 237 | +{ |
| 238 | + const Errors e1 = checkMandatoryMetadata(); |
| 239 | + const Errors e2 = checkSimpleConstraints(); |
| 240 | + if ( !e1.empty() || !e2.empty() ) |
| 241 | + return concat(e1, e2); |
| 242 | + |
| 243 | + return checkComplexConstraints(); |
| 244 | +} |
| 245 | + |
| 246 | +} // namespace zim |
0 commit comments