JavaScript Sets: Techniques and Performance Tips

Published: October 22, 2021

In this article, I explore methods for finding differences between sets.

Fake Data Generation

The generateFakeData function allows you to create a custom set containing a mix of active and default channels.

generateFakeData.js
function getRandomInt(min, max) {
  if (min < 0) {
    min = 0;
  }
  min = Math.ceil(min);
  max = Math.floor(max);
  return Math.floor(Math.random() * (max - min) + min); // The maximum is exclusive and the minimum is inclusive
}
 
const generateFakeData = (
  totalChannels = 600,
  activeChannels = 100,
  defaultChannels = 50
) => {
  // Make sure activeChannels is less than totalChannels
  // Make sure activeChannels - defaultChannels is greater than 0
 
  // Array from 1 to defaultChannels
  let arr = Array.from({ length: defaultChannels }, (_, i) => i + 1);
 
  // Array from defaultChannels to totalChannels
  let extra_arr = Array.from(
    { length: totalChannels },
    (_, i) => i + defaultChannels + 1
  );
 
  // random number from range [activeChannels +- defaultChannels]
  let random_num = getRandomInt(
    activeChannels - defaultChannels,
    activeChannels + defaultChannels
  );
  // Choose a random number of items from the extra array
  let random_arr = extra_arr
    .sort(() => Math.random() - 0.5)
    .slice(0, random_num);
  // Combine the two arrays
  let filteredRooms = [...arr, ...random_arr];
  // Add the @ symbol to each item
  filteredRooms = filteredRooms.map((item) => `prices@coin_${item}`);
  // convert to set
  let set = new Set(filteredRooms);
  return set;
};
 
let totalChannels = 50000;
let activeChannels = 25000;
let defaultChannels = 500;
let currentChannels = generateFakeData(
  totalChannels,
  activeChannels,
  defaultChannels
);

Finding Differences Between Sets

Say we had the intention of retrieving new items in array_j, as well as the the items in array_i that are not in array j as separate output arrays, these are the 3 main ways to do this: The Array method, Set method or Set method with generator.

var arr_i = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"];
var arr_j = ["a", "c", "d", "f", "g", "h", "j", "k", "l", "n"];

The answers should be the new items in array j as ['b', 'e', 'i'] as well as the items in array i that are not in array j as ['k', 'l', 'n']

// Convert to Set
var set_i = new Set(arr_i);
var set_j = new Set(arr_j);
 
const changes = (arr1, arr2) => {
  // Using Array method
  let turn_on = arr2.filter((x) => !arr1.includes(x));
  let turn_off = arr1.filter((x) => !arr2.includes(x));
  return { turn_on, turn_off };
};
 
const setChanges = (set1, set2) => {
  // Using Set method
  let turn_on = new Set([...set2].filter((x) => !set1.has(x)));
  let turn_off = new Set([...set1].filter((x) => !set2.has(x)));
  return { turn_on, turn_off };
};
 
function* setMinus(setA, setB) {
  // Using Set method with generator by @koblas
  for (const v of setB.values()) {
    // .delete returns true if value was already in Set; otherwise false.
    if (!setA.delete(v)) {
      yield v;
    }
  }
}
 
const changesGenerator = (set1, set2) => {
  let turn_off = Array.from(setMinus(set2, set1));
  let turn_on = Array.from(setMinus(set1, set2));
  return { turn_on, turn_off };
};

All three methods return correctly:

{ turn_on: [ 'k', 'l', 'n' ], turn_off: [ 'b', 'e', 'i' ] }

Speed matters when filtering sets. Timing these on random array including numbers from range [0,10000] containing 5000 items:

let arr_i = Array.from({ length: 5000 }, () =>
  Math.floor(Math.random() * 10000)
);
let arr_j = Array.from({ length: 5000 }, () =>
  Math.floor(Math.random() * 10000)
);
 
var set_i = new Set(arr_i);
var set_j = new Set(arr_j);
 
console.time("Array method");
changes(arr_i, arr_j);
console.timeEnd("Array method");
 
console.time("Set method");
setChanges(set_i, set_j);
console.timeEnd("Set method");
 
console.time("Generator method");
changesGenerator(set_i, set_j);
console.timeEnd("Generator method");

Returns:

Array method: 36.894ms
Set method: 1.14ms
Generator method: 2.155ms

Clearly, the Array method took a lot longer.