How can I count the number of times different terms occur in a string string with JavaScript?
You can use String.prototype.matchAll
const str = ['ocurrence 1, and..', 'ocurrence 2, and..', 'other phrases'];
const array = [...str.join ` `.matchAll(new RegExp('ocurrence', 'gi'))];
console.log(array.length); // 2
Edit: counting occurrences in one string;
const articleBody = 'ocurrence 1, and.. ocurrence 2, and... etc';
const array = [...articleBody.matchAll(new RegExp('ocurrence', 'gi'))];
console.log(array.length) // 2;
Edit 2:
Using a service worker
I created a text of 3000 words ("Lorem ipsum" generator).
main.js
if ("serviceWorker" in navigator) {
document.addEventListener("DOMContentLoaded", function () {
const phrases = [
{ text: "Lorem ipsum" },
{ text: "elementum mattis" },
];
phrases.map((phrase, index) => {
const phraseWorker = new Worker("phrase-match-sw.js");
phraseWorker.onmessage = function (oEvent) {
const { text, match } = oEvent.data;
phrases[index] = { text, match };
};
phraseWorker.postMessage({
article:
"Lorem ipsum dolor sit amet consectetur adipiscing elit sapien orci, ligula leo consequat mus tellus elementum mattis lacus maecenas curabitur, ",
phrase,
});
});
});
}
phrase-match-sw.js
onmessage = function (event) {
const { article, phrase } = event.data;
phrase.match = [...article.matchAll(new RegExp(phrase.text,"gi"))].length;
postMessage(phrase);
};
Please take a look this solution.
function occurrences(string, subString, allowOverlapping) {
string += "";
subString += "";
if (subString.length <= 0) return (string.length + 1);
var n = 0,
pos = 0,
step = allowOverlapping ? 1 : subString.length;
while (true) {
pos = string.indexOf(subString, pos);
if (pos >= 0) {
++n;
pos += step;
} else break;
}
return n;
}
console.time('occurrences');
occurrences("foofoofoo", "bar"); //0
occurrences("foofoofoo", "foo"); //3
occurrences("foofoofoo", "foofoo"); //1
console.timeEnd('occurrences');
console.time('RegExp');
("foofoofoo".match(new RegExp("bar", 'gi')) || []).length; // 0
("foofoofoo".match(new RegExp("foo", 'gi')) || []).length; // 3
("foofoofoo".match(new RegExp("foofoo", 'gi')) || []).length; // 1
console.timeEnd('RegExp');
This is the benchmark result: https://jsben.ch/aIqEP
I tried it on a 50KB article body and benchmarked it with your usage. The results showing the solution below is %40 faster than your solution.
The benchmark link https://jsben.ch/wWW6e
I decreased the article size for saving it you can increase it for better results.
let phrases = [{ phrase: 'placerat', usage: 0 }, { phrase: 'lorem', usage: 0 }];
let matching = article.match(new RegExp(phrases.map(phrase => phrase.phrase).join('|'), 'gi')).reduce((acc, match) => {
match = match.toLowerCase();
if (acc[match]) {
acc[match].count++;
} else {
acc[match] = { count: 0 };
}
return acc;
}, {});