在JavaScript中计算关键字的最佳方法是什么?
时间:2020-03-06 14:48:02 来源:igfitidea点击:
在JavaScript中对关键字进行计数的最好,最有效的方法是什么?基本上,我想获取一个字符串,并获取出现在字符串中的前N个单词或者短语,主要用于建议标签的使用。我在寻找概念性提示或者指向实际示例的链接,而不是实际的代码,但我当然不介意我们是否也希望共享代码。如果有特定的功能会有所帮助,我也将不胜感激。
现在,我想我正在使用split()函数以空格分隔字符串,然后使用正则表达式清除标点符号。我也希望它不区分大小写。
解决方案
尝试将字符串分割成多个单词,然后对产生的单词进行计数,然后对计数进行排序。
清理完单词数组后,假设我们将其称为" wordArray":
var keywordRegistry = {}; for(var i = 0; i < wordArray.length; i++) { if(keywordRegistry.hasOwnProperty(wordArray[i]) == false) { keywordRegistry[wordArray[i]] = 0; } keywordRegistry[wordArray[i]] = keywordRegistry[wordArray[i]] + 1; } // now keywordRegistry will have, as properties, all of the // words in your word array with their respective counts // this will alert (choose something better than alert) all words and their counts for(var keyword in keywordRegistry) { alert("The keyword '" + keyword + "' occurred " + keywordRegistry[keyword] + " times"); }
那应该为我们提供完成这部分工作的基础知识。
剪切,粘贴并执行演示:
var text = "Text to be examined to determine which n words are used the most"; // Find 'em! var wordRegExp = /\w+(?:'\w{1,2})?/g; var words = {}; var matches; while ((matches = wordRegExp.exec(text)) != null) { var word = matches[0].toLowerCase(); if (typeof words[word] == "undefined") { words[word] = 1; } else { words[word]++; } } // Sort 'em! var wordList = []; for (var word in words) { if (words.hasOwnProperty(word)) { wordList.push([word, words[word]]); } } wordList.sort(function(a, b) { return b[1] - a[1]; }); // Come back any time, straaanger! var n = 10; var message = ["The top " + n + " words are:"]; for (var i = 0; i < n; i++) { message.push(wordList[i][0] + " - " + wordList[i][1] + " occurance" + (wordList[i][1] == 1 ? "" : "s")); } alert(message.join("\n"));
可重用功能:
function getTopNWords(text, n) { var wordRegExp = /\w+(?:'\w{1,2})?/g; var words = {}; var matches; while ((matches = wordRegExp.exec(text)) != null) { var word = matches[0].toLowerCase(); if (typeof words[word] == "undefined") { words[word] = 1; } else { words[word]++; } } var wordList = []; for (var word in words) { if (words.hasOwnProperty(word)) { wordList.push([word, words[word]]); } } wordList.sort(function(a, b) { return b[1] - a[1]; }); var topWords = []; for (var i = 0; i < n; i++) { topWords.push(wordList[i][0]); } return topWords; }
我会完全按照我们上面提到的来隔离每个单词。然后,我可能会将每个单词添加为数组的索引,并以出现的次数作为值。
例如:
var a = new Array; a[word] = a[word]?a[word]+1:1;
现在,我们知道有多少个独特单词(a.length)以及每个单词存在多少次(a [word])。
这仅基于insin的先前答案,而只有一个循环:
function top_words(text, n) { // Split text on non word characters var words = text.toLowerCase().split(/\W+/) var positions = new Array() var word_counts = new Array() for (var i=0; i<words.length; i++) { var word = words[i] if (!word) { continue } if (typeof positions[word] == 'undefined') { positions[word] = word_counts.length word_counts.push([word, 1]) } else { word_counts[positions[word]][1]++ } } // Put most frequent words at the beginning. word_counts.sort(function (a, b) {return b[1] - a[1]}) // Return the first n items return word_counts.slice(0, n) } // Let's see if it works. var text = "Words in here are repeated. Are repeated, repeated!" alert(top_words(text, 3))
该示例的结果是:[['repeated',3],['are',2],['words,1]]