Skip to content

fix: indexing quoted triples #369

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions perf/N3StoreStar-perf.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env node
const N3 = require('..');
const assert = require('assert');

console.log('N3Store performance test');

const prefix = 'http://example.org/#';

/* Test triples */
const dim = Number.parseInt(process.argv[2], 10) || 22;
const dimSquared = dim * dim;
const dimCubed = dimSquared * dim;
const dimToTheFour = dimCubed * dim;
const dimToTheFive = dimToTheFour * dim;

const store = new N3.Store();
let TEST = `- Adding ${dimToTheFive} triples to the default graph`;
console.time(TEST);
let i, j, k, l, m;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
for (m = 0; m < dim; m++)
store.addQuad(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
);
console.timeEnd(TEST);

console.log(`* Memory usage for triples: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB`);

TEST = `- Finding all ${dimToTheFive} triples in the default graph ${dimSquared * 1} times (0 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
for (m = 0; m < dim; m++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
).length, 1);
console.timeEnd(TEST);

TEST = `- Finding all ${dimCubed} triples in the default graph ${dimSquared * 2} times (1 variable subject)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(null, N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j)).length, dimCubed);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), null, N3.DataFactory.namedNode(prefix + j)).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${dim} triples in the default graph ${dimSquared * 4} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), null, N3.DataFactory.namedNode(prefix + l)).length, dim);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable object)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j), null).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${dim} triples in the default graph ${dimSquared * 4} times (1 variable objects)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), N3.DataFactory.namedNode(prefix + l), null).length, dim);
console.timeEnd(TEST);

TEST = `- Finding all ${dimSquared} triples in the default graph ${dimSquared * 1} times (2 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
null,
null
).length,
dimSquared);
console.timeEnd(TEST);
118 changes: 118 additions & 0 deletions perf/N3StoreStarViews-perf.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env node
const N3 = require('../lib');
const assert = require('assert');

console.log('N3Store performance test');

const prefix = 'http://example.org/#';

/* Test triples */
const dim = Number.parseInt(process.argv[2], 10) || 64;
const dimSquared = dim * dim;
const dimCubed = dimSquared * dim;
const dimToTheFour = dimCubed * dim;
const dimToTheFive = dimToTheFour * dim;

const store = new N3.Store();
let TEST = `- Adding ${dimToTheFive} triples to the default graph`;
console.time(TEST);
let i, j, k, l, m;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
for (m = 0; m < 3; m++)
store.addQuad(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
);
console.timeEnd(TEST);

console.log(`* Memory usage for triples: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB`);

TEST = `- Finding all ${dimToTheFive} triples in the default graph ${dimSquared * 1} times (0 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
for (m = 0; m < 3; m++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
).length, 1);
console.timeEnd(TEST);

TEST = `- Finding all ${dimCubed} triples in the default graph ${dimSquared * 2} times (1 variable subject)`;
console.time(TEST);
for (i = 0; i < 3; i++)
for (j = 0; j < 3; j++)
assert.equal(store.getQuads(null, N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j)).length, dimCubed);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), null, N3.DataFactory.namedNode(prefix + j)).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${3} triples in the default graph ${dimCubed * 3} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), null, N3.DataFactory.namedNode(prefix + l)).length, 3);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable object)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j), null).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${3} triples in the default graph ${dimCubed * 3} times (1 variable objects)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), N3.DataFactory.namedNode(prefix + l), null).length, 3);
console.timeEnd(TEST);

TEST = `- Finding all ${9} triples in the default graph ${dimCubed} times (2 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
null,
null
).length,
9);
console.timeEnd(TEST);
55 changes: 31 additions & 24 deletions src/N3DataFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ let DEFAULTGRAPH;
let _blankNodeCounter = 0;

const escapedLiteral = /^"(.*".*)(?="[^"]*$)/;
const quadId = /^<<("(?:""|[^"])*"[^ ]*|[^ ]+) ("(?:""|[^"])*"[^ ]*|[^ ]+) ("(?:""|[^"])*"[^ ]*|[^ ]+) ?("(?:""|[^"])*"[^ ]*|[^ ]+)?>>$/;

// ## DataFactory singleton
const DataFactory = {
Expand Down Expand Up @@ -188,9 +187,12 @@ export class DefaultGraph extends Term {
// ## DefaultGraph singleton
DEFAULTGRAPH = new DefaultGraph();


// ### Constructs a term from the given internal string ID
export function termFromId(id, factory) {
// The third 'nested' parameter of this function is to aid
// with recursion over nested terms. It should not be used
// by consumers of this library.
// See https://github.com/rdfjs/N3.js/pull/311#discussion_r1061042725
export function termFromId(id, factory, nested) {
factory = factory || DataFactory;

// Falsy value or empty string indicate the default graph
Expand All @@ -215,21 +217,28 @@ export function termFromId(id, factory) {
return factory.literal(id.substr(1, endPos - 1),
id[endPos + 1] === '@' ? id.substr(endPos + 2)
: factory.namedNode(id.substr(endPos + 3)));
case '<':
const components = quadId.exec(id);
return factory.quad(
termFromId(unescapeQuotes(components[1]), factory),
termFromId(unescapeQuotes(components[2]), factory),
termFromId(unescapeQuotes(components[3]), factory),
components[4] && termFromId(unescapeQuotes(components[4]), factory)
);
case '[':
id = JSON.parse(id);
break;
default:
return factory.namedNode(id);
if (!nested || !Array.isArray(id)) {
return factory.namedNode(id);
}
}
return factory.quad(
termFromId(id[0], factory, true),
termFromId(id[1], factory, true),
termFromId(id[2], factory, true),
id[3] && termFromId(id[3], factory, true)
);
}

// ### Constructs an internal string ID from the given term or ID string
export function termToId(term) {
// The third 'nested' parameter of this function is to aid
// with recursion over nested terms. It should not be used
// by consumers of this library.
// See https://github.com/rdfjs/N3.js/pull/311#discussion_r1061042725
export function termToId(term, nested) {
if (typeof term === 'string')
return term;
if (term instanceof Term && term.termType !== 'Quad')
Expand All @@ -247,17 +256,15 @@ export function termToId(term) {
term.language ? `@${term.language}` :
(term.datatype && term.datatype.value !== xsd.string ? `^^${term.datatype.value}` : '')}`;
case 'Quad':
// To identify RDF* quad components, we escape quotes by doubling them.
// This avoids the overhead of backslash parsing of Turtle-like syntaxes.
return `<<${
escapeQuotes(termToId(term.subject))
} ${
escapeQuotes(termToId(term.predicate))
} ${
escapeQuotes(termToId(term.object))
}${
(isDefaultGraph(term.graph)) ? '' : ` ${termToId(term.graph)}`
}>>`;
const res = [
termToId(term.subject, true),
termToId(term.predicate, true),
termToId(term.object, true),
];
if (!isDefaultGraph(term.graph)) {
res.push(termToId(term.graph, true));
}
return nested ? res : JSON.stringify(res);
default: throw new Error(`Unexpected termType: ${term.termType}`);
}
}
Expand Down
Loading