From 864293a13fced58c28b64c69494ddec4c83840f2 Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Fri, 20 Oct 2023 15:50:53 +0100 Subject: [PATCH 1/5] fix: support deeply nested triples in termFromId and termToId --- src/N3DataFactory.js | 55 ++++++++------- test/Term-test.js | 157 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 158 insertions(+), 54 deletions(-) diff --git a/src/N3DataFactory.js b/src/N3DataFactory.js index dec7aaef..4c253a7c 100644 --- a/src/N3DataFactory.js +++ b/src/N3DataFactory.js @@ -10,7 +10,6 @@ let DEFAULTGRAPH; let _blankNodeCounter = 0; const escapedLiteral = /^"(.*".*)(?="[^"]*$)/; -const quadId = /^<<("(?:""|[^"])*"[^ ]*|[^ ]+) ("(?:""|[^"])*"[^ ]*|[^ ]+) ("(?:""|[^"])*"[^ ]*|[^ ]+) ?("(?:""|[^"])*"[^ ]*|[^ ]+)?>>$/; // ## DataFactory singleton const DataFactory = { @@ -188,9 +187,12 @@ export class DefaultGraph extends Term { // ## DefaultGraph singleton DEFAULTGRAPH = new DefaultGraph(); - // ### Constructs a term from the given internal string ID -export function termFromId(id, factory) { +// The third 'nested' parameter of this function is to aid +// with recursion over nested terms. It should not be used +// by consumers of this library. +// See https://github.com/rdfjs/N3.js/pull/311#discussion_r1061042725 +export function termFromId(id, factory, nested) { factory = factory || DataFactory; // Falsy value or empty string indicate the default graph @@ -215,21 +217,28 @@ export function termFromId(id, factory) { return factory.literal(id.substr(1, endPos - 1), id[endPos + 1] === '@' ? id.substr(endPos + 2) : factory.namedNode(id.substr(endPos + 3))); - case '<': - const components = quadId.exec(id); - return factory.quad( - termFromId(unescapeQuotes(components[1]), factory), - termFromId(unescapeQuotes(components[2]), factory), - termFromId(unescapeQuotes(components[3]), factory), - components[4] && termFromId(unescapeQuotes(components[4]), factory) - ); + case '[': + id = JSON.parse(id); + break; default: - return factory.namedNode(id); + if (!nested || !Array.isArray(id)) { + return factory.namedNode(id); + } } + return factory.quad( + termFromId(id[0], factory, true), + termFromId(id[1], factory, true), + termFromId(id[2], factory, true), + id[3] && termFromId(id[3], factory, true) + ); } // ### Constructs an internal string ID from the given term or ID string -export function termToId(term) { +// The third 'nested' parameter of this function is to aid +// with recursion over nested terms. It should not be used +// by consumers of this library. +// See https://github.com/rdfjs/N3.js/pull/311#discussion_r1061042725 +export function termToId(term, nested) { if (typeof term === 'string') return term; if (term instanceof Term && term.termType !== 'Quad') @@ -247,17 +256,15 @@ export function termToId(term) { term.language ? `@${term.language}` : (term.datatype && term.datatype.value !== xsd.string ? `^^${term.datatype.value}` : '')}`; case 'Quad': - // To identify RDF* quad components, we escape quotes by doubling them. - // This avoids the overhead of backslash parsing of Turtle-like syntaxes. - return `<<${ - escapeQuotes(termToId(term.subject)) - } ${ - escapeQuotes(termToId(term.predicate)) - } ${ - escapeQuotes(termToId(term.object)) - }${ - (isDefaultGraph(term.graph)) ? '' : ` ${termToId(term.graph)}` - }>>`; + const res = [ + termToId(term.subject, true), + termToId(term.predicate, true), + termToId(term.object, true), + ]; + if (!isDefaultGraph(term.graph)) { + res.push(termToId(term.graph, true)); + } + return nested ? res : JSON.stringify(res); default: throw new Error(`Unexpected termType: ${term.termType}`); } } diff --git a/test/Term-test.js b/test/Term-test.js index fde0ece8..b5b5462e 100644 --- a/test/Term-test.js +++ b/test/Term-test.js @@ -1,3 +1,8 @@ +import { + termToId, + termFromId, +} from '../src/'; + import { Term, NamedNode, @@ -6,15 +11,57 @@ import { Variable, DefaultGraph, Quad, - termToId, - termFromId, -} from '../src/'; - -import { escapeQuotes, unescapeQuotes, } from '../src/N3DataFactory'; +const DEEP_TRIPLE = new Quad( + new Quad( + new Quad( + new Quad( + new BlankNode('n3-000'), + new Variable('var-b'), + new Literal('"abc"@en-us'), + new NamedNode('http://ex.org/d') + ), + new Variable('var-b'), + new Quad( + new BlankNode('n3-000'), + new Variable('var-b'), + new Literal('"abc"@en-us'), + new NamedNode('http://ex.org/d') + ), + new NamedNode('http://ex.org/d') + ), + new Variable('var-b'), + new Quad( + new BlankNode('n3-000'), + new Variable('var-b'), + new Literal('"abc"@en-us'), + new NamedNode('http://ex.org/d') + ), + new NamedNode('http://ex.org/d') + ), + new NamedNode('http://ex.org/b'), + new Quad( + new Quad( + new BlankNode('n3-000'), + new Variable('var-b'), + new Literal('"abc"@en-us'), + new NamedNode('http://ex.org/d') + ), + new Variable('var-b'), + new Quad( + new BlankNode('n3-000'), + new Variable('var-b'), + new Literal('"abc"@en-us'), + new NamedNode('http://ex.org/d') + ), + new NamedNode('http://ex.org/d') + ), + new NamedNode('http://ex.org/d') +); + describe('Term', () => { describe('The Term module', () => { it('should be a function', () => { @@ -92,19 +139,20 @@ describe('Term', () => { it( 'should create a Quad with the default graph if the id doesnt specify the graph', () => { - expect(termFromId('<>')).toEqual(new Quad( + const q = new Quad( new NamedNode('http://ex.org/a'), new NamedNode('http://ex.org/b'), new Literal('"abc"@en-us'), new DefaultGraph() - )); + ); + expect(q.equals(termFromId(termToId(q)))).toBe(true); } ); it( 'should create a Quad with the correct graph if the id specifies a graph', () => { - const id = '<>'; + const id = '["http://ex.org/a", "http://ex.org/b", "\\"abc\\"@en-us", "http://ex.org/d"]'; expect(termFromId(id)).toEqual(new Quad( new NamedNode('http://ex.org/a'), new NamedNode('http://ex.org/b'), @@ -115,7 +163,7 @@ describe('Term', () => { ); it('should create a Quad correctly', () => { - const id = '<>'; + const id = '["http://ex.org/a", "http://ex.org/b", "http://ex.org/c"]'; expect(termFromId(id)).toEqual(new Quad( new NamedNode('http://ex.org/a'), new NamedNode('http://ex.org/b'), @@ -125,7 +173,7 @@ describe('Term', () => { }); it('should create a Quad correctly', () => { - const id = '<<_:n3-123 ?var-a ?var-b _:n3-000>>'; + const id = '["_:n3-123", "?var-a", "?var-b", "_:n3-000"]'; expect(termFromId(id)).toEqual(new Quad( new BlankNode('n3-123'), new Variable('var-a'), @@ -135,7 +183,7 @@ describe('Term', () => { }); it('should create a Quad correctly', () => { - const id = '<>'; + const id = '["?var-a", "?var-b", "\\"abc\\"@en-us", "?var-d"]'; expect(termFromId(id)).toEqual(new Quad( new Variable('var-a'), new Variable('var-b'), @@ -145,7 +193,7 @@ describe('Term', () => { }); it('should create a Quad correctly', () => { - const id = '<<_:n3-000 ?var-b _:n3-123 http://ex.org/d>>'; + const id = '["_:n3-000", "?var-b", "_:n3-123", "http://ex.org/d"]'; expect(termFromId(id)).toEqual(new Quad( new BlankNode('n3-000'), new Variable('var-b'), @@ -157,7 +205,7 @@ describe('Term', () => { it( 'should create a Quad correctly from literal containing escaped quotes', () => { - const id = '<<_:n3-000 ?var-b "Hello ""W""orl""d!"@en-us http://ex.org/d>>'; + const id = '["_:n3-000", "?var-b", "\\"Hello \\"W\\"orl\\"d!\\"@en-us", "http://ex.org/d"]'; expect(termFromId(id)).toEqual(new Quad( new BlankNode('n3-000'), new Variable('var-b'), @@ -170,16 +218,22 @@ describe('Term', () => { it( 'should create a Quad correctly from literal containing escaped quotes', () => { - const id = '<<"Hello ""W""orl""d!"@en-us http://ex.org/b http://ex.org/c>>'; - expect(termFromId(id)).toEqual(new Quad( + const q = new Quad( new Literal('"Hello "W"orl"d!"@en-us'), new NamedNode('http://ex.org/b'), new NamedNode('http://ex.org/c'), new DefaultGraph() - )); + ); + + expect(termFromId(termToId(q))).toEqual(q); } ); + it('should correctly handle deeply nested quads', () => { + expect(DEEP_TRIPLE.equals(termFromId(termToId(DEEP_TRIPLE)))).toBe(true); + expect(termFromId(termToId(DEEP_TRIPLE)).equals(DEEP_TRIPLE)).toBe(true); + }); + describe('with a custom factory', () => { const factory = { defaultGraph: function () { return ['d']; }, @@ -346,7 +400,7 @@ describe('Term', () => { new NamedNode('http://ex.org/b'), new Literal('"abc"@en-us'), new DefaultGraph() - ))).toBe('<>'); + ))).toBe('["http://ex.org/a","http://ex.org/b","\\"abc\\"@en-us"]'); }); it('should create an id from a Quad', () => { @@ -355,7 +409,9 @@ describe('Term', () => { new NamedNode('http://ex.org/b'), new Literal('"abc"@en-us'), new NamedNode('http://ex.org/d') - ))).toBe('<>'); + ))).toBe( + '["http://ex.org/a","http://ex.org/b","\\"abc\\"@en-us","http://ex.org/d"]' + ); }); it('should create an id from a manually created Quad', () => { @@ -366,7 +422,9 @@ describe('Term', () => { graph: new NamedNode('http://ex.org/d'), termType: 'Quad', value: '', - })).toBe('<>'); + })).toBe( + '["http://ex.org/a","http://ex.org/b","\\"abc\\"@en-us","http://ex.org/d"]' + ); }); it('should create an id with escaped literals from a Quad', () => { @@ -375,7 +433,9 @@ describe('Term', () => { new Variable('var-b'), new Literal('"Hello "W"orl"d!"@en-us'), new NamedNode('http://ex.org/d') - ))).toBe('<<_:n3-000 ?var-b "Hello ""W""orl""d!"@en-us http://ex.org/d>>'); + ))).toBe( + '["_:n3-000","?var-b","\\"Hello \\"W\\"orl\\"d!\\"@en-us","http://ex.org/d"]' + ); }); it( @@ -392,7 +452,7 @@ describe('Term', () => { new Literal('"abc"@en-us'), new DefaultGraph() ))).toBe( - '<<<<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/b "abc"@en-us>>' + '[["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"],"http://ex.org/b","\\"abc\\"@en-us"]' ); } ); @@ -411,7 +471,7 @@ describe('Term', () => { ), new DefaultGraph() ))).toBe( - '<<"abc"@en-us http://ex.org/b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>>>>' + '["\\"abc\\"@en-us","http://ex.org/b",["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"]]' ); } ); @@ -435,7 +495,7 @@ describe('Term', () => { ), new DefaultGraph() ))).toBe( - '<<<<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>>>>' + '[["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"],"http://ex.org/b",["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"]]' ); } ); @@ -454,7 +514,7 @@ describe('Term', () => { new Literal('"abc"@en-us'), new NamedNode('http://ex.org/d') ))).toBe( - '<<<<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/b "abc"@en-us http://ex.org/d>>' + '[["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"],"http://ex.org/b","\\"abc\\"@en-us","http://ex.org/d"]' ); } ); @@ -473,7 +533,7 @@ describe('Term', () => { ), new NamedNode('http://ex.org/d') ))).toBe( - '<<"abc"@en-us http://ex.org/b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/d>>' + '["\\"abc\\"@en-us","http://ex.org/b",["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"],"http://ex.org/d"]' ); } ); @@ -495,7 +555,7 @@ describe('Term', () => { ), new NamedNode('http://ex.org/d') ))).toBe( - '<<<<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/d>>' + '[["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"],"http://ex.org/b",["_:n3-000","?var-b","\\"abc\\"@en-us","http://ex.org/d"],"http://ex.org/d"]' ); }); @@ -516,12 +576,49 @@ describe('Term', () => { ), new DefaultGraph() ))).toBe( - '<<<<_:n3-000 ?var-b "Hello ""W""orl""d!"@en-us http://ex.org/d>> http://ex.org/b <<_:n3-000 ?var-b "Hello ""W""orl""d!"@en-us http://ex.org/d>>>>' + '[["_:n3-000","?var-b","\\"Hello \\"W\\"orl\\"d!\\"@en-us","http://ex.org/d"],"http://ex.org/b",["_:n3-000","?var-b","\\"Hello \\"W\\"orl\\"d!\\"@en-us","http://ex.org/d"]]' ); }); + it( + 'should termToId <-> termFromId should roundtrip on deeply nested quad', + () => { + const q = new Quad( + new Quad( + new NamedNode('http://example.org/s1'), + new NamedNode('http://example.org/p1'), + new NamedNode('http://example.org/o1') + ), + new NamedNode('http://example.org/p1'), + new Quad( + new Quad( + new Literal('"s1"'), + new NamedNode('http://example.org/p1'), + new BlankNode('o1') + ), + new NamedNode('p2'), + new Quad( + new Quad( + new Literal('"s1"'), + new NamedNode('http://example.org/p1'), + new BlankNode('o1') + ), + new NamedNode('http://example.org/p1'), + new NamedNode('http://example.org/o1') + ) + ) + ); + + expect(q).toEqual(termFromId(termToId(q))); + expect(termFromId(termToId(q))).toEqual(q); + expect(q.equals(termFromId(termToId(q)))).toBe(true); + expect(termFromId(termToId(q)).equals(q)).toBe(true); + expect(termFromId(termToId(q)).equals(termFromId(termToId(q)))).toBe(true); + } + ); + it('should correctly handle deeply nested quads', () => { - expect(termToId(new Quad( + const q = new Quad( new Quad( new Quad( new Quad( @@ -566,9 +663,9 @@ describe('Term', () => { new NamedNode('http://ex.org/d') ), new NamedNode('http://ex.org/d') - ))).toBe( - '<<<<<<<<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> ?var-b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/d>> ?var-b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/d>> http://ex.org/b <<<<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> ?var-b <<_:n3-000 ?var-b "abc"@en-us http://ex.org/d>> http://ex.org/d>> http://ex.org/d>>' ); + + expect(q.equals(termFromId(termToId(q)))).toBe(true); }); it('should throw on an unknown type', () => { From 27e34f1581dbd5b6b2800df2af682d89d1055a2b Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Fri, 20 Oct 2023 15:58:24 +0100 Subject: [PATCH 2/5] chore: extend dataset tests for deeply nested triples --- test/N3Store-test.js | 143 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 125 insertions(+), 18 deletions(-) diff --git a/test/N3Store-test.js b/test/N3Store-test.js index 2a50894e..3c5ab9d0 100644 --- a/test/N3Store-test.js +++ b/test/N3Store-test.js @@ -1,11 +1,13 @@ import { Store, + termFromId, termToId, +} from '../src/'; +import { NamedNode, Literal, DefaultGraph, Quad, - termFromId, termToId, -} from '../src/'; +} from '../src/N3DataFactory'; import namespaces from '../src/IRIs'; import { Readable } from 'readable-stream'; import arrayifyStream from 'arrayify-stream'; @@ -299,43 +301,119 @@ describe('Store', () => { }); }); - describe('removing matching quads for RDF*', () => { + describe('removing matching quads for RDF-star', () => { let store; - beforeEach(() => { - store = new Store([ - new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p2'), new NamedNode('o1')), - new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p1'), new NamedNode('o1')), - new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p2'), new NamedNode('o2')), - new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p1'), new NamedNode('o2')), - new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2')), - ]); + const allQuads = [ + new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p2'), new NamedNode('o1')), + new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p1'), new NamedNode('o1')), + new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p2'), new NamedNode('o2')), + new Quad(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), new NamedNode('p1'), new NamedNode('o2')), + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2')), + ]; + beforeAll(() => { + store = new Store(allQuads); + }); + + it('should start with the correct size', () => { + expect(store.size).toEqual(5); }); it( 'should return the removed quads', - forResultStream(shouldIncludeAll, () => { return store.removeMatches(null, 'p2', 'o2'); }, - [termToId(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1'))), 'p2', 'o2']) + () => arrayifyStream(store.removeMatches(null, 'p2', 'o2')).then(quads => { + expect(quads.length).toBe(1); + expect(quads[0].equals( + new Quad( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')), + new NamedNode('p2'), + new NamedNode('o2') + ) + )).toBe(true); + }) ); it('should decrease the size', () => { - expect(store.size).toEqual(5); + expect(store.size).toEqual(4); }); - it('should match RDF* and normal quads at the same time', done => { + it('should match RDF-star and normal quads at the same time', done => { const stream = store.removeMatches(null, 'p1', 'o2'); stream.on('end', () => { - expect(store.size).toEqual(3); + expect(store.size).toEqual(2); done(); }); }); it('should allow matching using a quad', done => { - const stream = store.removeMatches(termToId(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1')))); + const stream = store.removeMatches(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1'))); stream.on('end', () => { - expect(store.size).toEqual(1); + expect(store.size).toEqual(0); done(); }); }); + + it( + 'should allow matching using a quad and only match against relevant quads', + done => { + const s2 = new Store([ + ...allQuads, + new Quad( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2')), + new NamedNode('p1'), + new NamedNode('o2')), + ]); + + const stream = s2.removeMatches(new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o1'))); + stream.on('end', () => { + expect(s2.size).toEqual(2); + done(); + }); + } + ); + }); + + // These tests should probably be broken in the future; they are here to serve to use that we should to a mver bump + // at such a time + describe('A store with quoted quads', () => { + let store; + beforeEach(() => { + store = new Store([ + new Quad( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2')), + new NamedNode('p1'), + new NamedNode('o2')), + new Quad( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2'), new NamedNode('g')), + new NamedNode('p1'), + new NamedNode('o2')), + ]); + }); + + it('should have the correct size', () => { + expect(store.size).toBe(2); + }); + + it('should get all quads with shared predicate', () => { + expect(store.getQuads(null, new NamedNode('p1'), null).length).toBe(2); + }); + + it('should get all quads with shared predicate 2', () => { + expect(store.getQuads( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2')), + new NamedNode('p1'), + null + ).length).toBe(1); + expect(store.getQuads( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2'), new NamedNode('g')), + new NamedNode('p1'), + null + ).length).toBe(1); + expect(store.getQuads( + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o2'), new NamedNode('g2')), + new NamedNode('p1'), + null + ).length).toBe(0); + }); }); describe('A Store with 7 elements', () => { @@ -1892,6 +1970,35 @@ describe('Store', () => { } ); + it( + 'should include added elements in match if iteration has not yet started (deeply nested)', + () => { + const m = store.match(null, null, null, null); + store.add(new Quad( + new NamedNode('s1'), + new NamedNode('p1'), + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o3')) + ) + ); + store.add(new Quad( + new NamedNode('s1'), + new NamedNode('p1'), + new Quad( + new NamedNode('s1'), + new NamedNode('p1'), + new Quad( + new NamedNode('s1'), + new NamedNode('p1'), + new NamedNode('o3') + ) + ) + ) + ); + expect([...m]).toHaveLength(4); + expect([...store.match(null, null, null, null)]).toHaveLength(4); + } + ); + it( 'should still include results of original match after iterating while adding new data', () => { From 894b06dc9b20370921a06fbd1cd07675702b4628 Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Fri, 20 Oct 2023 16:03:45 +0100 Subject: [PATCH 3/5] perf: index terms of quoted triples --- src/N3Store.js | 117 ++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 49 deletions(-) diff --git a/src/N3Store.js b/src/N3Store.js index fb404b91..aac1c4c7 100644 --- a/src/N3Store.js +++ b/src/N3Store.js @@ -2,6 +2,7 @@ import { default as N3DataFactory, termToId, termFromId } from './N3DataFactory'; import { Readable } from 'readable-stream'; import namespaces from './IRIs'; +import { isDefaultGraph } from './N3Util'; // ## Constructor export default class N3Store { @@ -14,7 +15,6 @@ export default class N3Store { // saving memory by using only numbers as keys in `_graphs` this._id = 0; this._ids = Object.create(null); - this._ids['><'] = 0; // dummy entry, so the first actual key is non-zero this._entities = Object.create(null); // inverse of `_ids` // `_blankNodeIndex` is the index of the last automatically named blank node this._blankNodeIndex = 0; @@ -30,6 +30,45 @@ export default class N3Store { this.addQuads(quads); } + _termFromId(id, factory) { + if (id[0] === '.') { + const entities = this._entities; + const terms = id.split('.'); + const q = this._factory.quad( + this._termFromId(entities[terms[1]]), + this._termFromId(entities[terms[2]]), + this._termFromId(entities[terms[3]]), + terms[4] && this._termFromId(entities[terms[4]]) + ); + return q; + } + return termFromId(id, factory); + } + + _termToNumericId(term) { + if (term.termType === 'Quad') { + const s = this._termToNumericId(term.subject), + p = this._termToNumericId(term.predicate), + o = this._termToNumericId(term.object); + let g; + + return s && p && o && (isDefaultGraph(term.graph) || (g = this._termToNumericId(term.graph))) && + this._ids[g ? `.${s}.${p}.${o}.${g}` : `.${s}.${p}.${o}`]; + } + return this._ids[termToId(term)]; + } + + _termToNewNumericId(term) { + // This assumes that no graph term is present - we may wish to error if there is one + const str = term && term.termType === 'Quad' ? + `.${this._termToNewNumericId(term.subject)}.${this._termToNewNumericId(term.predicate)}.${this._termToNewNumericId(term.object)}${ + isDefaultGraph(term.graph) ? '' : `.${this._termToNewNumericId(term.graph)}` + }` + : termToId(term); + + return this._ids[str] || (this._ids[this._entities[++this._id] = str] = this._id); + } + // ## Public properties // ### `size` returns the number of quads in the store @@ -88,24 +127,24 @@ export default class N3Store { *_findInIndex(index0, key0, key1, key2, name0, name1, name2, graphId) { let tmp, index1, index2; const entityKeys = this._entities; - const graph = termFromId(graphId, this._factory); + const graph = this._termFromId(graphId, this._factory); const parts = { subject: null, predicate: null, object: null }; // If a key is specified, use only that part of index 0. if (key0) (tmp = index0, index0 = {})[key0] = tmp[key0]; for (const value0 in index0) { if (index1 = index0[value0]) { - parts[name0] = termFromId(entityKeys[value0], this._factory); + parts[name0] = this._termFromId(entityKeys[value0], this._factory); // If a key is specified, use only that part of index 1. if (key1) (tmp = index1, index1 = {})[key1] = tmp[key1]; for (const value1 in index1) { if (index2 = index1[value1]) { - parts[name1] = termFromId(entityKeys[value1], this._factory); + parts[name1] = this._termFromId(entityKeys[value1], this._factory); // If a key is specified, use only that part of index 2, if it exists. const values = key2 ? (key2 in index2 ? [key2] : []) : Object.keys(index2); // Create quads for all items found in index 2. for (let l = 0; l < values.length; l++) { - parts[name2] = termFromId(entityKeys[values[l]], this._factory); + parts[name2] = this._termFromId(entityKeys[values[l]], this._factory); yield this._factory.quad(parts.subject, parts.predicate, parts.object, graph); } } @@ -190,7 +229,7 @@ export default class N3Store { return id => { if (!(id in uniqueIds)) { uniqueIds[id] = true; - callback(termFromId(this._entities[id], this._factory)); + callback(this._termFromId(this._entities[id], this._factory)); } }; } @@ -214,9 +253,6 @@ export default class N3Store { predicate = subject.predicate, subject = subject.subject; // Convert terms to internal string representation - subject = termToId(subject); - predicate = termToId(predicate); - object = termToId(object); graph = termToId(graph); // Find the graph that will contain the triple @@ -232,11 +268,9 @@ export default class N3Store { // Since entities can often be long IRIs, we avoid storing them in every index. // Instead, we have a separate index that maps entities to numbers, // which are then used as keys in the other indexes. - const ids = this._ids; - const entities = this._entities; - subject = ids[subject] || (ids[entities[++this._id] = subject] = this._id); - predicate = ids[predicate] || (ids[entities[++this._id] = predicate] = this._id); - object = ids[object] || (ids[entities[++this._id] = object] = this._id); + subject = this._termToNewNumericId(subject); + predicate = this._termToNewNumericId(predicate); + object = this._termToNewNumericId(object); const changed = this._addToIndex(graphItem.subjects, subject, predicate, object); this._addToIndex(graphItem.predicates, predicate, object, subject); @@ -281,17 +315,14 @@ export default class N3Store { predicate = subject.predicate, subject = subject.subject; // Convert terms to internal string representation - subject = termToId(subject); - predicate = termToId(predicate); - object = termToId(object); graph = termToId(graph); // Find internal identifiers for all components // and verify the quad exists. - const ids = this._ids, graphs = this._graphs; + const graphs = this._graphs; let graphItem, subjects, predicates; - if (!(subject = ids[subject]) || !(predicate = ids[predicate]) || - !(object = ids[object]) || !(graphItem = graphs[graph]) || + if (!(subject = subject && this._termToNumericId(subject)) || !(predicate = predicate && this._termToNumericId(predicate)) || + !(object = object && this._termToNumericId(object)) || !(graphItem = graphs[graph]) || !(subjects = graphItem.subjects[subject]) || !(predicates = subjects[predicate]) || !(object in predicates)) @@ -350,18 +381,15 @@ export default class N3Store { // Setting any field to `undefined` or `null` indicates a wildcard. *readQuads(subject, predicate, object, graph) { // Convert terms to internal string representation - subject = subject && termToId(subject); - predicate = predicate && termToId(predicate); - object = object && termToId(object); graph = graph && termToId(graph); - const graphs = this._getGraphs(graph), ids = this._ids; + const graphs = this._getGraphs(graph); let content, subjectId, predicateId, objectId; // Translate IRIs to internal index keys. - if (isString(subject) && !(subjectId = ids[subject]) || - isString(predicate) && !(predicateId = ids[predicate]) || - isString(object) && !(objectId = ids[object])) + if (subject && !(subjectId = this._termToNumericId(subject)) || + predicate && !(predicateId = this._termToNumericId(predicate)) || + object && !(objectId = this._termToNumericId(object))) return; for (const graphId in graphs) { @@ -408,18 +436,15 @@ export default class N3Store { // Setting any field to `undefined` or `null` indicates a wildcard. countQuads(subject, predicate, object, graph) { // Convert terms to internal string representation - subject = subject && termToId(subject); - predicate = predicate && termToId(predicate); - object = object && termToId(object); graph = graph && termToId(graph); - const graphs = this._getGraphs(graph), ids = this._ids; + const graphs = this._getGraphs(graph); let count = 0, content, subjectId, predicateId, objectId; // Translate IRIs to internal index keys. - if (isString(subject) && !(subjectId = ids[subject]) || - isString(predicate) && !(predicateId = ids[predicate]) || - isString(object) && !(objectId = ids[object])) + if (subject && !(subjectId = this._termToNumericId(subject)) || + predicate && !(predicateId = this._termToNumericId(predicate)) || + object && !(objectId = this._termToNumericId(object))) return 0; for (const graphId in graphs) { @@ -490,17 +515,15 @@ export default class N3Store { // Setting any field to `undefined` or `null` indicates a wildcard. forSubjects(callback, predicate, object, graph) { // Convert terms to internal string representation - predicate = predicate && termToId(predicate); - object = object && termToId(object); graph = graph && termToId(graph); - const ids = this._ids, graphs = this._getGraphs(graph); + const graphs = this._getGraphs(graph); let content, predicateId, objectId; callback = this._uniqueEntities(callback); // Translate IRIs to internal index keys. - if (isString(predicate) && !(predicateId = ids[predicate]) || - isString(object) && !(objectId = ids[object])) + if (predicate && !(predicateId = this._termToNumericId(predicate)) || + object && !(objectId = this._termToNumericId(object))) return; for (graph in graphs) { @@ -537,17 +560,15 @@ export default class N3Store { // Setting any field to `undefined` or `null` indicates a wildcard. forPredicates(callback, subject, object, graph) { // Convert terms to internal string representation - subject = subject && termToId(subject); - object = object && termToId(object); graph = graph && termToId(graph); - const ids = this._ids, graphs = this._getGraphs(graph); + const graphs = this._getGraphs(graph); let content, subjectId, objectId; callback = this._uniqueEntities(callback); // Translate IRIs to internal index keys. - if (isString(subject) && !(subjectId = ids[subject]) || - isString(object) && !(objectId = ids[object])) + if (subject && !(subjectId = this._termToNumericId(subject)) || + object && !(objectId = this._termToNumericId(object))) return; for (graph in graphs) { @@ -584,17 +605,15 @@ export default class N3Store { // Setting any field to `undefined` or `null` indicates a wildcard. forObjects(callback, subject, predicate, graph) { // Convert terms to internal string representation - subject = subject && termToId(subject); - predicate = predicate && termToId(predicate); graph = graph && termToId(graph); - const ids = this._ids, graphs = this._getGraphs(graph); + const graphs = this._getGraphs(graph); let content, subjectId, predicateId; callback = this._uniqueEntities(callback); // Translate IRIs to internal index keys. - if (isString(subject) && !(subjectId = ids[subject]) || - isString(predicate) && !(predicateId = ids[predicate])) + if (subject && !(subjectId = this._termToNumericId(subject)) || + predicate && !(predicateId = this._termToNumericId(predicate))) return; for (graph in graphs) { From 91f1fa1564fb736bead2ebacb1ded51be2b3d371 Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Fri, 20 Oct 2023 16:08:11 +0100 Subject: [PATCH 4/5] chore: add performance test for quoted triples --- perf/N3StoreStar-perf.js | 118 ++++++++++++++++++++++++++++++++++ perf/N3StoreStarViews-perf.js | 118 ++++++++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 perf/N3StoreStar-perf.js create mode 100644 perf/N3StoreStarViews-perf.js diff --git a/perf/N3StoreStar-perf.js b/perf/N3StoreStar-perf.js new file mode 100644 index 00000000..67e9d1a9 --- /dev/null +++ b/perf/N3StoreStar-perf.js @@ -0,0 +1,118 @@ +#!/usr/bin/env node +const N3 = require('..'); +const assert = require('assert'); + +console.log('N3Store performance test'); + +const prefix = 'http://example.org/#'; + +/* Test triples */ +const dim = Number.parseInt(process.argv[2], 10) || 22; +const dimSquared = dim * dim; +const dimCubed = dimSquared * dim; +const dimToTheFour = dimCubed * dim; +const dimToTheFive = dimToTheFour * dim; + +const store = new N3.Store(); +let TEST = `- Adding ${dimToTheFive} triples to the default graph`; +console.time(TEST); +let i, j, k, l, m; +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < dim; l++) + for (m = 0; m < dim; m++) + store.addQuad( + N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), + N3.DataFactory.namedNode(prefix + l), + N3.DataFactory.namedNode(prefix + m) + ); +console.timeEnd(TEST); + +console.log(`* Memory usage for triples: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB`); + +TEST = `- Finding all ${dimToTheFive} triples in the default graph ${dimSquared * 1} times (0 variables)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < dim; l++) + for (m = 0; m < dim; m++) + assert.equal(store.getQuads( + N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), + N3.DataFactory.namedNode(prefix + l), + N3.DataFactory.namedNode(prefix + m) + ).length, 1); +console.timeEnd(TEST); + +TEST = `- Finding all ${dimCubed} triples in the default graph ${dimSquared * 2} times (1 variable subject)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + assert.equal(store.getQuads(null, N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j)).length, dimCubed); +console.timeEnd(TEST); + +TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable predicate)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), null, N3.DataFactory.namedNode(prefix + j)).length, 0); +console.timeEnd(TEST); + +TEST = `- Finding all ${dim} triples in the default graph ${dimSquared * 4} times (1 variable predicate)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < dim; l++) + assert.equal(store.getQuads(N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), null, N3.DataFactory.namedNode(prefix + l)).length, dim); +console.timeEnd(TEST); + +TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable object)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j), null).length, 0); +console.timeEnd(TEST); + +TEST = `- Finding all ${dim} triples in the default graph ${dimSquared * 4} times (1 variable objects)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < dim; l++) + assert.equal(store.getQuads(N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), N3.DataFactory.namedNode(prefix + l), null).length, dim); +console.timeEnd(TEST); + +TEST = `- Finding all ${dimSquared} triples in the default graph ${dimSquared * 1} times (2 variables)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + assert.equal(store.getQuads( + N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), + null, + null + ).length, + dimSquared); +console.timeEnd(TEST); diff --git a/perf/N3StoreStarViews-perf.js b/perf/N3StoreStarViews-perf.js new file mode 100644 index 00000000..c3a8e08e --- /dev/null +++ b/perf/N3StoreStarViews-perf.js @@ -0,0 +1,118 @@ +#!/usr/bin/env node +const N3 = require('../lib'); +const assert = require('assert'); + +console.log('N3Store performance test'); + +const prefix = 'http://example.org/#'; + +/* Test triples */ +const dim = Number.parseInt(process.argv[2], 10) || 64; +const dimSquared = dim * dim; +const dimCubed = dimSquared * dim; +const dimToTheFour = dimCubed * dim; +const dimToTheFive = dimToTheFour * dim; + +const store = new N3.Store(); +let TEST = `- Adding ${dimToTheFive} triples to the default graph`; +console.time(TEST); +let i, j, k, l, m; +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < 3; l++) + for (m = 0; m < 3; m++) + store.addQuad( + N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), + N3.DataFactory.namedNode(prefix + l), + N3.DataFactory.namedNode(prefix + m) + ); +console.timeEnd(TEST); + +console.log(`* Memory usage for triples: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB`); + +TEST = `- Finding all ${dimToTheFive} triples in the default graph ${dimSquared * 1} times (0 variables)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < 3; l++) + for (m = 0; m < 3; m++) + assert.equal(store.getQuads( + N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), + N3.DataFactory.namedNode(prefix + l), + N3.DataFactory.namedNode(prefix + m) + ).length, 1); +console.timeEnd(TEST); + +TEST = `- Finding all ${dimCubed} triples in the default graph ${dimSquared * 2} times (1 variable subject)`; +console.time(TEST); +for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + assert.equal(store.getQuads(null, N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j)).length, dimCubed); +console.timeEnd(TEST); + +TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable predicate)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), null, N3.DataFactory.namedNode(prefix + j)).length, 0); +console.timeEnd(TEST); + +TEST = `- Finding all ${3} triples in the default graph ${dimCubed * 3} times (1 variable predicate)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < 3; l++) + assert.equal(store.getQuads(N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), null, N3.DataFactory.namedNode(prefix + l)).length, 3); +console.timeEnd(TEST); + +TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable object)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j), null).length, 0); +console.timeEnd(TEST); + +TEST = `- Finding all ${3} triples in the default graph ${dimCubed * 3} times (1 variable objects)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + for (l = 0; l < 3; l++) + assert.equal(store.getQuads(N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), N3.DataFactory.namedNode(prefix + l), null).length, 3); +console.timeEnd(TEST); + +TEST = `- Finding all ${9} triples in the default graph ${dimCubed} times (2 variables)`; +console.time(TEST); +for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + for (k = 0; k < dim; k++) + assert.equal(store.getQuads( + N3.DataFactory.quad( + N3.DataFactory.namedNode(prefix + i), + N3.DataFactory.namedNode(prefix + j), + N3.DataFactory.namedNode(prefix + k) + ), + null, + null + ).length, + 9); +console.timeEnd(TEST); From 06ffb84d79b4b311a05cc579a471d1b28ba98c6f Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Fri, 20 Oct 2023 20:02:03 +0100 Subject: [PATCH 5/5] Update test/N3Store-test.js --- test/N3Store-test.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/N3Store-test.js b/test/N3Store-test.js index 3c5ab9d0..883198bc 100644 --- a/test/N3Store-test.js +++ b/test/N3Store-test.js @@ -372,8 +372,6 @@ describe('Store', () => { ); }); - // These tests should probably be broken in the future; they are here to serve to use that we should to a mver bump - // at such a time describe('A store with quoted quads', () => { let store; beforeEach(() => {