Currently, I am in the process of creating a tool for my client that organizes keywords into groups based on similarities within the top 10 Google search URLs. Each keyword is represented as a JavaScript object containing a list of URLs. The condition for grouping two keywords together is if they share 3 or more common URLs. Furthermore, there should be no duplicates in the generated groups, and the total number of groups created is not predetermined beforehand. Any insights on refining the logic behind this problem would be greatly appreciated!
While I have crafted an algorithm below to tackle this issue, it still results in duplicates and fails to accurately group certain keywords together.
function makeKeywordGroupsNew(results: Result[], uid: string): Group[] {
let dataset = results;
let groups: any[] = [];
// iterating through all records in the dataset
dataset.forEach((current: Result) => {
// initializing the group with the current keyword
const group = { volume: 0, items: [current] };
// removing current keyword from dataset
dataset = dataset.filter(el => el.keyword !== current.keyword);
// comparing current keyword with others to determine shared URLs
dataset.forEach((other: Result) => {
const urlsInCommon = _.intersection(current.urls, other.urls);
if (urlsInCommon.length >= 3) {
group.items.push(other);
}
});
// calculating group volume - extraneous to core logic
group.volume = _.sum(group.items.map(item => item.volume));
// sorting keywords by volume - extraneous to core logic
group.items = group.items
.sort((a, b) => {
if (a.volume < b.volume) return 1;
if (a.volume > b.volume) return -1;
return 0;
})
.map(el => el.keyword);
// adding newly formed group to result array
groups.push(group);
});
// filtering out single keyword groups
groups = groups.filter(group => group.items.length > 1);
// removing duplicate keywords in each group
groups = groups.map(group => ({ ...group, items: _.uniq(group.items) }));
return groups.map(group => ({
uid,
main: group.items[0],
keywords: group.items.slice(1, group.length),
volume: group.volume
}));
}
I was anticipating the output from input.json to align with output.csv, but my solution either undergroups or misclassifies keywords.