In this article, I explore methods for finding differences between sets.
Fake Data Generation
The generateFakeData
function allows you to create a custom set containing a mix of active and default channels.
function getRandomInt(min, max) {
if (min < 0) {
min = 0;
}
min = Math.ceil(min);
max = Math.floor(max);
return Math.floor(Math.random() * (max - min) + min); // The maximum is exclusive and the minimum is inclusive
}
const generateFakeData = (
totalChannels = 600,
activeChannels = 100,
defaultChannels = 50
) => {
// Make sure activeChannels is less than totalChannels
// Make sure activeChannels - defaultChannels is greater than 0
// Array from 1 to defaultChannels
let arr = Array.from({ length: defaultChannels }, (_, i) => i + 1);
// Array from defaultChannels to totalChannels
let extra_arr = Array.from(
{ length: totalChannels },
(_, i) => i + defaultChannels + 1
);
// random number from range [activeChannels +- defaultChannels]
let random_num = getRandomInt(
activeChannels - defaultChannels,
activeChannels + defaultChannels
);
// Choose a random number of items from the extra array
let random_arr = extra_arr
.sort(() => Math.random() - 0.5)
.slice(0, random_num);
// Combine the two arrays
let filteredRooms = [...arr, ...random_arr];
// Add the @ symbol to each item
filteredRooms = filteredRooms.map((item) => `prices@coin_${item}`);
// convert to set
let set = new Set(filteredRooms);
return set;
};
let totalChannels = 50000;
let activeChannels = 25000;
let defaultChannels = 500;
let currentChannels = generateFakeData(
totalChannels,
activeChannels,
defaultChannels
);
Finding Differences Between Sets
Say we had the intention of retrieving new items in array_j
, as well as the the items in array_i
that are not in array j
as separate output arrays, these are the 3 main ways to do this: The Array method, Set method or Set method with generator.
var arr_i = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"];
var arr_j = ["a", "c", "d", "f", "g", "h", "j", "k", "l", "n"];
The answers should be the new items in array j as ['b', 'e', 'i']
as well as the items in array i that are not in array j as ['k', 'l', 'n']
// Convert to Set
var set_i = new Set(arr_i);
var set_j = new Set(arr_j);
const changes = (arr1, arr2) => {
// Using Array method
let turn_on = arr2.filter((x) => !arr1.includes(x));
let turn_off = arr1.filter((x) => !arr2.includes(x));
return { turn_on, turn_off };
};
const setChanges = (set1, set2) => {
// Using Set method
let turn_on = new Set([...set2].filter((x) => !set1.has(x)));
let turn_off = new Set([...set1].filter((x) => !set2.has(x)));
return { turn_on, turn_off };
};
function* setMinus(setA, setB) {
// Using Set method with generator by @koblas
for (const v of setB.values()) {
// .delete returns true if value was already in Set; otherwise false.
if (!setA.delete(v)) {
yield v;
}
}
}
const changesGenerator = (set1, set2) => {
let turn_off = Array.from(setMinus(set2, set1));
let turn_on = Array.from(setMinus(set1, set2));
return { turn_on, turn_off };
};
All three methods return correctly:
{ turn_on: [ 'k', 'l', 'n' ], turn_off: [ 'b', 'e', 'i' ] }
Speed matters when filtering sets. Timing these on random array including numbers from range [0,10000]
containing 5000
items:
let arr_i = Array.from({ length: 5000 }, () =>
Math.floor(Math.random() * 10000)
);
let arr_j = Array.from({ length: 5000 }, () =>
Math.floor(Math.random() * 10000)
);
var set_i = new Set(arr_i);
var set_j = new Set(arr_j);
console.time("Array method");
changes(arr_i, arr_j);
console.timeEnd("Array method");
console.time("Set method");
setChanges(set_i, set_j);
console.timeEnd("Set method");
console.time("Generator method");
changesGenerator(set_i, set_j);
console.timeEnd("Generator method");
Returns:
Array method: 36.894ms
Set method: 1.14ms
Generator method: 2.155ms
Clearly, the Array method took a lot longer.