From a73af34dbe55e294a9a623d31705c2542248d5e5 Mon Sep 17 00:00:00 2001 From: Tobias Bocanegra Date: Mon, 7 Apr 2025 17:37:22 +0200 Subject: [PATCH 1/2] feat: read metadata from source --- package-lock.json | 4 +- src/steps/fetch-sourced-metadata.js | 70 +++++++++++++++++++++++++++++ src/utils/modifiers.js | 8 ++++ test/modifiers.test.js | 9 ++++ 4 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 src/steps/fetch-sourced-metadata.js diff --git a/package-lock.json b/package-lock.json index 688f1757..43eca6c4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@adobe/helix-html-pipeline", - "version": "6.21.6", + "version": "6.24.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@adobe/helix-html-pipeline", - "version": "6.21.6", + "version": "6.24.2", "license": "Apache-2.0", "dependencies": { "@adobe/helix-markdown-support": "7.1.12", diff --git a/src/steps/fetch-sourced-metadata.js b/src/steps/fetch-sourced-metadata.js new file mode 100644 index 00000000..ff4ff36a --- /dev/null +++ b/src/steps/fetch-sourced-metadata.js @@ -0,0 +1,70 @@ +/* + * Copyright 2021 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { PipelineStatusError } from '../PipelineStatusError.js'; +import { Modifiers } from '../utils/modifiers.js'; + +/** + * Loads metadata from the metadata sources if required. this happens when the amount of metadata + * was too large to include in the config response. the metadata is loaded if + * `state.metadata` is empty and `config.metadata.source` is not. + * + * @type PipelineStep + * @param {PipelineState} state + * @param {PipelineResponse} res + * @returns {Promise} + */ +// eslint-disable-next-line no-unused-vars +export default async function fetchSourcedMetadata(state, res) { + if (!state.metadata.isEmpty()) { + return; + } + const sources = state.config.metadata?.source || []; + if (sources.length === 0) { + return; + } + + const { contentBusId, partition } = state; + const metadata = []; + await Promise.all(sources.map(async (src) => { + const key = `${contentBusId}/${partition}/${src}`; + + })); + const metadataPath = `${mappedPath}/metadata.json`; + const ret = await state.s3Loader.getObject('helix-content-bus', key); + if (ret.status === 200) { + let json; + try { + json = JSON.parse(ret.body); + } catch (e) { + throw new PipelineStatusError(500, `failed parsing of ${metadataPath}: ${e.message}`); + } + const { data } = json.default ?? json; + if (!data) { + state.log.info(`default sheet missing in ${metadataPath}`); + return; + } + + if (!Array.isArray(data)) { + throw new PipelineStatusError(500, `failed loading of ${metadataPath}: data must be an array`); + } + + state.mappedMetadata = Modifiers.fromModifierSheet( + data, + ); + // note, that the folder mapped metadata does not influence the last-modified calculation. + return; + } + if (ret.status !== 404) { + throw new PipelineStatusError(502, `failed to load ${metadataPath}: ${ret.status}`); + } +} diff --git a/src/utils/modifiers.js b/src/utils/modifiers.js index 66edb3d9..c5364f77 100644 --- a/src/utils/modifiers.js +++ b/src/utils/modifiers.js @@ -124,6 +124,14 @@ export class Modifiers { return new Modifiers(res); } + /** + * Returns true if there are no modifiers. + * @returns {boolean} + */ + isEmpty() { + return this.modifiers.length === 0; + } + constructor(config) { this.modifiers = Object.entries(config).map(([url, mods]) => { const pat = url.indexOf('*') >= 0 ? globToRegExp(url) : url; diff --git a/test/modifiers.test.js b/test/modifiers.test.js index 38772bac..f663f25f 100644 --- a/test/modifiers.test.js +++ b/test/modifiers.test.js @@ -145,4 +145,13 @@ describe('Metadata', () => { const actual = Modifiers.fromModifierSheet(data).getModifiers('/nope'); assert.deepEqual(actual, {}); }); + + it('isEmpty returns true, if empty', async () => { + assert.strictEqual(new Modifiers({}).isEmpty(), true); + }); + + it('isEmpty returns false, if not empty', async () => { + const { default: { data } } = await readTestJSON('metadata.json'); + assert.strictEqual(Modifiers.fromModifierSheet(data).isEmpty(), false); + }); }); From 98c724e178019535f7397d28697b1a0d13a27642 Mon Sep 17 00:00:00 2001 From: Tobias Bocanegra Date: Tue, 8 Apr 2025 14:36:27 +0200 Subject: [PATCH 2/2] feat: load sourced metadata if required --- src/html-pipe.js | 2 + src/steps/fetch-sourced-metadata.js | 57 ++++----- test/FileS3Loader.js | 17 ++- test/html-pipe.test.js | 175 ++++++++++++++++++++++++++++ 4 files changed, 218 insertions(+), 33 deletions(-) diff --git a/src/html-pipe.js b/src/html-pipe.js index e5221e2b..fe97a473 100644 --- a/src/html-pipe.js +++ b/src/html-pipe.js @@ -36,6 +36,7 @@ import { PipelineResponse } from './PipelineResponse.js'; import { validatePathInfo } from './utils/path.js'; import fetchMappedMetadata from './steps/fetch-mapped-metadata.js'; import { applyMetaLastModified, setLastModified } from './utils/last-modified.js'; +import fetchSourcedMetadata from './steps/fetch-sourced-metadata.js'; /** * Fetches the content and if not found, fetches the 404.html @@ -132,6 +133,7 @@ export async function htmlPipe(state, req) { state.timer?.update('metadata-fetch'); await Promise.all([ contentPromise, + fetchSourcedMetadata(state, res), fetchMappedMetadata(state, res), ]); diff --git a/src/steps/fetch-sourced-metadata.js b/src/steps/fetch-sourced-metadata.js index ff4ff36a..42d406d8 100644 --- a/src/steps/fetch-sourced-metadata.js +++ b/src/steps/fetch-sourced-metadata.js @@ -12,6 +12,7 @@ import { PipelineStatusError } from '../PipelineStatusError.js'; import { Modifiers } from '../utils/modifiers.js'; +import { extractLastModified, recordLastModified } from '../utils/last-modified.js'; /** * Loads metadata from the metadata sources if required. this happens when the amount of metadata @@ -34,37 +35,37 @@ export default async function fetchSourcedMetadata(state, res) { } const { contentBusId, partition } = state; - const metadata = []; + const metadatas = {}; await Promise.all(sources.map(async (src) => { + metadatas[src] = []; const key = `${contentBusId}/${partition}/${src}`; + const ret = await state.s3Loader.getObject('helix-content-bus', key); + if (ret.status === 200) { + let json; + try { + json = JSON.parse(ret.body); + } catch (e) { + throw new PipelineStatusError(500, `failed parsing of ${key}: ${e.message}`); + } + const { data } = json.default ?? json; + if (!data) { + state.log.info(`default sheet missing in ${key}`); + return; + } - })); - const metadataPath = `${mappedPath}/metadata.json`; - const ret = await state.s3Loader.getObject('helix-content-bus', key); - if (ret.status === 200) { - let json; - try { - json = JSON.parse(ret.body); - } catch (e) { - throw new PipelineStatusError(500, `failed parsing of ${metadataPath}: ${e.message}`); - } - const { data } = json.default ?? json; - if (!data) { - state.log.info(`default sheet missing in ${metadataPath}`); - return; - } - - if (!Array.isArray(data)) { - throw new PipelineStatusError(500, `failed loading of ${metadataPath}: data must be an array`); + if (!Array.isArray(data)) { + throw new PipelineStatusError(500, `failed loading of ${key}: data must be an array`); + } + metadatas[src] = data; + recordLastModified(state, res, 'content', extractLastModified(ret.headers)); + } else if (ret.status !== 404) { + throw new PipelineStatusError(502, `failed to load ${key}: ${ret.status}`); } - - state.mappedMetadata = Modifiers.fromModifierSheet( - data, - ); - // note, that the folder mapped metadata does not influence the last-modified calculation. - return; - } - if (ret.status !== 404) { - throw new PipelineStatusError(502, `failed to load ${metadataPath}: ${ret.status}`); + })); + // aggregate the metadata in the same order as specified + const metadata = []; + for (const src of sources) { + metadata.push(...metadatas[src]); } + state.metadata = Modifiers.fromModifierSheet(metadata); } diff --git a/test/FileS3Loader.js b/test/FileS3Loader.js index af6d9c15..5e5d03c3 100644 --- a/test/FileS3Loader.js +++ b/test/FileS3Loader.js @@ -25,6 +25,7 @@ export class FileS3Loader { statusCodeOverrides: {}, rewrites: [], headerOverride: {}, + contentOverrides: {}, }); } @@ -38,6 +39,11 @@ export class FileS3Loader { return this; } + override(fileName, data) { + this.contentOverrides[fileName] = data; + return this; + } + headers(fileName, name, value) { let headers = this.headerOverride[fileName]; if (!headers) { @@ -57,20 +63,21 @@ export class FileS3Loader { fileName = this.rewrites.reduce((result, rewrite) => rewrite(key) || result, null) || fileName; const status = this.statusCodeOverrides[fileName]; + let body = this.contentOverrides[fileName] || ''; const headers = this.headerOverride[fileName] ?? new Map(); - if (status) { + if (status || body) { // eslint-disable-next-line no-console - console.log(`FileS3Loader: loading ${bucketId}/${fileName} -> ${status}`); + console.log(`FileS3Loader: loading ${bucketId}/${fileName} -> ${status || 200}`); return { - status, - body: '', + status: status || 200, + body, headers, }; } const file = path.resolve(dir, fileName); try { - const body = await readFile(file, 'utf-8'); + body = await readFile(file, 'utf-8'); // eslint-disable-next-line no-console console.log(`FileS3Loader: loading ${bucketId}/${fileName} -> 200`); return { diff --git a/test/html-pipe.test.js b/test/html-pipe.test.js index ee9ae4af..225a9e7e 100644 --- a/test/html-pipe.test.js +++ b/test/html-pipe.test.js @@ -198,6 +198,181 @@ describe('HTML Pipe Test', () => { }); }); + it('loads sourced metadata', async () => { + const s3Loader = new FileS3Loader(); + s3Loader.override('metadata.json', JSON.stringify( + { + data: [ + { URL: '/**', key: 'category', value: 'news' }, + { URL: '/**', key: 'template', value: 'page' }, + ], + }, + )); + s3Loader.override('metadata-seo.json', JSON.stringify( + { + data: [ + { URL: '/**', key: 'template', value: 'blog' }, + ], + }, + )); + const state = DEFAULT_STATE({ + ...DEFAULT_CONFIG, + metadata: { + source: [ + 'metadata.json', + 'metadata-seo.json', + 'metadata-missing.json', + ], + }, + }, { + log: console, + s3Loader, + ref: 'super-test', + partition: 'live', + path: '/articles', + timer: { + update: () => { }, + }, + }); + const resp = await htmlPipe( + state, + new PipelineRequest(new URL('https://www.hlx.live/')), + ); + assert.strictEqual(resp.status, 200); + assert.ok(resp.body.includes('')); + assert.ok(resp.body.includes('')); + assert.deepStrictEqual(Object.fromEntries(resp.headers.entries()), { + 'content-type': 'text/html; charset=utf-8', + 'last-modified': 'Fri, 30 Apr 2021 03:47:18 GMT', + 'x-surrogate-key': 'iQzO-EvK0WKNO_o0 foo-id_metadata super-test--helix-pages--adobe_head foo-id', + }); + }); + + it('rejects invalid sourced metadata (json error)', async () => { + const s3Loader = new FileS3Loader(); + s3Loader.override('metadata.json', 'kaputt'); + const state = DEFAULT_STATE({ + ...DEFAULT_CONFIG, + metadata: { + source: [ + 'metadata.json', + ], + }, + }, { + log: console, + s3Loader, + ref: 'super-test', + partition: 'live', + path: '/articles', + timer: { + update: () => { }, + }, + }); + const resp = await htmlPipe( + state, + new PipelineRequest(new URL('https://www.hlx.live/')), + ); + assert.strictEqual(resp.status, 500); + assert.deepStrictEqual(Object.fromEntries(resp.headers.entries()), { + 'content-type': 'text/html; charset=utf-8', + 'x-error': 'failed parsing of foo-id/live/metadata.json: Unexpected token \'k\', "kaputt" is not valid JSON', + }); + }); + + it('rejects invalid sourced metadata (invalid sheet)', async () => { + const s3Loader = new FileS3Loader(); + s3Loader.override('metadata.json', '{ "data": "foo" }'); + const state = DEFAULT_STATE({ + ...DEFAULT_CONFIG, + metadata: { + source: [ + 'metadata.json', + ], + }, + }, { + log: console, + s3Loader, + ref: 'super-test', + partition: 'live', + path: '/articles', + timer: { + update: () => { }, + }, + }); + const resp = await htmlPipe( + state, + new PipelineRequest(new URL('https://www.hlx.live/')), + ); + assert.strictEqual(resp.status, 500); + assert.deepStrictEqual(Object.fromEntries(resp.headers.entries()), { + 'content-type': 'text/html; charset=utf-8', + 'x-error': 'failed loading of foo-id/live/metadata.json: data must be an array', + }); + }); + + it('ignores invalid sourced metadata (missing sheet)', async () => { + const s3Loader = new FileS3Loader(); + s3Loader.override('metadata.json', '{}'); + const state = DEFAULT_STATE({ + ...DEFAULT_CONFIG, + metadata: { + source: [ + 'metadata.json', + ], + }, + }, { + log: console, + s3Loader, + ref: 'super-test', + partition: 'live', + path: '/articles', + timer: { + update: () => { }, + }, + }); + const resp = await htmlPipe( + state, + new PipelineRequest(new URL('https://www.hlx.live/')), + ); + assert.strictEqual(resp.status, 200); + assert.deepStrictEqual(Object.fromEntries(resp.headers.entries()), { + 'content-type': 'text/html; charset=utf-8', + 'last-modified': 'Fri, 30 Apr 2021 03:47:18 GMT', + 'x-surrogate-key': 'iQzO-EvK0WKNO_o0 foo-id_metadata super-test--helix-pages--adobe_head foo-id', + }); + }); + + it('rejects invalid sourced metadata (status code)', async () => { + const s3Loader = new FileS3Loader(); + s3Loader.status('metadata.json', 401); + const state = DEFAULT_STATE({ + ...DEFAULT_CONFIG, + metadata: { + source: [ + 'metadata.json', + ], + }, + }, { + log: console, + s3Loader, + ref: 'super-test', + partition: 'live', + path: '/articles', + timer: { + update: () => { }, + }, + }); + const resp = await htmlPipe( + state, + new PipelineRequest(new URL('https://www.hlx.live/')), + ); + assert.strictEqual(resp.status, 502); + assert.deepStrictEqual(Object.fromEntries(resp.headers.entries()), { + 'content-type': 'text/html; charset=utf-8', + 'x-error': 'failed to load foo-id/live/metadata.json: 401', + }); + }); + it('renders static html with selector my-block.selector.html', async () => { const s3Loader = new FileS3Loader(); const state = DEFAULT_STATE(DEFAULT_CONFIG, {