Split string to array of strings with 1-3 words depends on length
You can express your rules as abbreviated regular expressions, build a real regex from them and apply it to your input:
text = "Lorem ipsum, dolor. sit amet? consectetur, adipiscing, elit! sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia bla?";
rules = ['(SSS)', '(SS(?=L))', '(L(?=L))', '(SL)', '(LS)', '(.+)']
regex = new RegExp(
rules
.join('|')
.replace(/S/g, '\\w{1,5}\\W+')
.replace(/L/g, '\\w{6,}\\W+')
, 'g')
console.log(text.match(regex))
If the rules don't change, the regex construction part is only needed once.
Note that this also handles punctuation in a reasonable way.
One option is to first create an array of rules, like:
const rules = [
// [# of words to splice if all conditions met, condition for word1, condition for word2, condition for word3...]
[3, 'less', 'less', 'less'],
// the above means: splice 3 words if the next 3 words' lengths are <6, <6, <6
[2, 'less', 'less', 'eqmore'],
// the above means: splice 2 words if the next 3 words' lengths are <6, <6, >=6
[1, 'eqmore', 'eqmore'],
[2, 'eqmore', 'less'],
[2, 'less', 'eqmore']
];
Then iterate through the array of rules, finding the rule that matches, extracting the appropriate number of words to splice from the matching rule, and push to the output array:
const rules = [
[3, 'less', 'less', 'less'],
[2, 'less', 'less', 'eqmore'],
[1, 'eqmore', 'eqmore'],
[2, 'eqmore', 'less'],
[2, 'less', 'eqmore']
];
const s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
const words = s.split(' ');
const output = [];
const verify = (cond, word) => cond === 'less' ? word.length < 6 : word.length >= 6;
while (words.length) {
const [wordCount] = rules.find(
([wordCount, ...conds]) => conds.every((cond, i) => verify(cond, words[i]))
);
output.push(words.splice(0, wordCount).join(' '));
}
console.log(output);
Of course, the .find
assumes that every input string will always have a matching rule for each position spliced.
For the additional rule that any words not matched by the previous rules just be added to the output, put [1]
into the bottom of the rules
array:
const rules = [
[3, 'less', 'less', 'less'],
[2, 'less', 'less', 'eqmore'],
[1, 'eqmore', 'eqmore'],
[2, 'eqmore', 'less'],
[2, 'less', 'eqmore'],
[1]
];
const s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
const words = s.split(' ');
const output = [];
const verify = (cond, word) => cond === 'less' ? word.length < 6 : word.length >= 6;
while (words.length) {
const [wordCount] = rules.find(
([wordCount, ...conds]) => conds.every((cond, i) => words[i] && verify(cond, words[i]))
);
output.push(words.splice(0, wordCount).join(' '));
}
console.log(output);
If we define words with length <6 to have size 1 and >=6 to have size 2, we can rewrite the rules to "if the next word would make the total size of the current row >= 4, start next line".
function wordSize(word) {
if (word.length < 6)
return 1;
return 2;
}
let s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusd tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
var result = [];
var words = s.split(" ");
var row = [];
for (var i = 0; i < words.length; ++i) {
if (row.reduce((s, w) => s + wordSize(w), 0) + wordSize(words[i]) >= 4) {
result.push(row);
row = [];
}
row.push(words[i]);
}
result.push(row);
result = result.map(a => a.join(" "));
console.log(result);