blob: ac46d1dfab875274daa916a20095096bd73b67c6 [file] [log] [blame] [edit]
const fs = require('fs');
/** @type {[string, function(any, Buffer<ArrayBufferLike>?): any][]} */
const workQueue = [];
if (!fs.existsSync("rng-seed.txt")) {
fs.writeFileSync("rng-seed.txt", ((Math.random() * 0x7FFF_FFFF) | 0).toString());
}
// Is there a built-in way to do seeded random scheduling in Node,
// like Elixir's test harness supports OOTB?
//
// I really hope there is, because (1) I'm not convinced that this finds
// all possible races (2) This is slow.
//
// I'm willing to pull in external dependencies for testing, even though
// I try to avoid them for code I'm distributing.
let myRngSeed = Number(fs.readFileSync("rng-seed.txt").toString("utf8"));
const myRng = () => {
// https://en.wikipedia.org/wiki/Xorshift
let x = myRngSeed;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
myRngSeed = x;
return x & 0x7FFF_FFFF;
};
const stepWorkQueue = () => {
while (workQueue.length !== 0) {
const i = myRng() % workQueue.length;
const [path, job] = workQueue[i];
workQueue[i] = workQueue[workQueue.length - 1];
workQueue.length -= 1;
try {
job(null, fs.readFileSync(path));
} catch (e) {
job(e, null);
}
}
};
process.nextTick(stepWorkQueue);
/**
* @import * as stringdex from "./stringdex.d.ts"
*/
const {Stringdex, RoaringBitmap} = require('./stringdex.js');
function usage() {
console.log("Usage: node nodesearch.js (exact|lev|substring|prefix) [column]");
console.log(" node nodesearch.js lookup [key-column] [value-column]");
process.exit(1);
}
if (process.env['STRINGDEX_JS_SEARCH_BREAK']) {
process.stdout.write("1\n");
}
const searchKind = process.argv[2];
var searchQuery = fs.readFileSync(0);
/** @type {stringdex.Callbacks?} */
let databaseCallbacks = null;
const database = Stringdex.loadDatabase({
loadRoot: callbacks => {
for (const key in callbacks) {
if (Object.hasOwn(callbacks, key)) {
// @ts-ignore
global[key] = callbacks[key];
}
databaseCallbacks = callbacks;
workQueue.push([
`search-index.js`,
(err, data) => {
if (data) {
eval(data.toString("utf8"));
} else if (databaseCallbacks) {
databaseCallbacks.err_rr_(err);
}
},
]);
process.nextTick(stepWorkQueue);
}
},
loadTreeByHash: hashHex => {
workQueue.push([
`search.index/${hashHex}.js`,
(err, data) => {
if (data) {
eval(data.toString("utf8"));
} else if (databaseCallbacks) {
databaseCallbacks.err_rn_(hashHex, err);
}
},
]);
process.nextTick(stepWorkQueue);
},
loadDataByNameAndHash: (name, hashHex) => {
workQueue.push([
`search.data/${name}/${hashHex}.js`,
(err, data) => {
if (data) {
eval(data.toString("utf8"));
} else if (databaseCallbacks) {
databaseCallbacks.err_rd_(hashHex, err);
}
},
]);
process.nextTick(stepWorkQueue);
},
});
// perform search, write output
database.then(async database => {
/** @type {stringdex.RoaringBitmap} */
let results = new RoaringBitmap(null);
if (process.argv.length < 4) {
usage();
}
const dataColumn = database.getData(process.argv[3]);
if (dataColumn === undefined) {
throw new Error("failed to load search tree");
}
const trie = await dataColumn.search(searchQuery);
switch (searchKind) {
case "exact":
case "prefix":
case "substring":
if (process.argv.length !== 4) {
usage();
}
if (trie) {
if (searchKind === "exact") {
results = trie.matches();
} else {
const orderedset = searchKind === "substring" ? trie.substringMatches() : trie.prefixMatches();
for await (const set of orderedset) {
results = results.union(set);
}
}
}
break;
case "lev":
if (process.argv.length !== 4) {
usage();
}
const orderedset = dataColumn.searchLev(searchQuery);
for await (const set of orderedset) {
results = results.union(set.matches());
}
break;
case "lookup":
if (process.argv.length !== 5) {
usage();
}
if (trie) {
results = trie.matches();
}
const data = database.getData(process.argv[4]);
if (!data) {
throw new Error("failed to load data");
}
for (const id of results.entries()) {
const d = await data.at(id);
if (d === undefined) {
throw new Error(`weird ${id} missing`);
}
process.stdout.write(uint8ArrayToBase64(d));
process.stdout.write("\n");
}
return;
default:
usage();
return;
}
for (const result of results.entries()) {
process.stdout.write(result.toString());
process.stdout.write("\n");
}
}, console.log);
// https://github.com/tc39/proposal-arraybuffer-base64/blob/main/playground/polyfill-core.mjs
/**
*
* @param {Uint8Array} arr
* @returns {string}
*/
function uint8ArrayToBase64(arr) {
let lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
let result = '';
let i = 0;
for (; i + 2 < arr.length; i += 3) {
let triplet = (arr[i] << 16) + (arr[i + 1] << 8) + arr[i + 2];
result +=
lookup[(triplet >> 18) & 63] +
lookup[(triplet >> 12) & 63] +
lookup[(triplet >> 6) & 63] +
lookup[triplet & 63];
}
if (i + 2 === arr.length) {
let triplet = (arr[i] << 16) + (arr[i + 1] << 8);
result +=
lookup[(triplet >> 18) & 63] +
lookup[(triplet >> 12) & 63] +
lookup[(triplet >> 6) & 63] +
'=';
} else if (i + 1 === arr.length) {
let triplet = arr[i] << 16;
result +=
lookup[(triplet >> 18) & 63] +
lookup[(triplet >> 12) & 63] +
'==';
}
return result;
}