比较字符串 Javascript 返回可能的百分比

Question

提问by Brad Ruderman

I am looking for a JavaScript function that can compare two strings and return the likeliness that they are alike. I have looked at soundex but that's not really great for multi-word strings or non-names. I am looking for a function like:

我正在寻找一个 JavaScript 函数，它可以比较两个字符串并返回它们相似的可能性。我看过 soundex 但这对于多字串或非名称并不是很好。我正在寻找这样的功能：

function compare(strA,strB){

}

compare("Apples","apple") = Some X Percentage.

The function would work with all types of strings, including numbers, multi-word values, and names. Perhaps there's a simple algorithm I could use?

该函数适用于所有类型的字符串，包括数字、多字值和名称。也许我可以使用一个简单的算法？

Ultimately none of these served my purpose so I used this:

 function compare(c, u) {
        var incept = false;
        var ca = c.split(",");
        u = clean(u);
        //ca = correct answer array (Collection of all correct answer)
        //caa = a single correct answer word array (collection of words of a single correct answer)
        //u = array of user answer words cleaned using custom clean function
        for (var z = 0; z < ca.length; z++) {
            caa = $.trim(ca[z]).split(" ");
            var pc = 0;
            for (var x = 0; x < caa.length; x++) {
                for (var y = 0; y < u.length; y++) {
                    if (soundex(u[y]) != null && soundex(caa[x]) != null) {
                        if (soundex(u[y]) == soundex(caa[x])) {
                            pc = pc + 1;
                        }
                    }
                    else {
                        if (u[y].indexOf(caa[x]) > -1) {
                            pc = pc + 1;
                        }
                    }
                }
            }
            if ((pc / caa.length) > 0.5) {
                return true;
            }
        }
        return false;
    }

    // create object listing the SOUNDEX values for each letter
    // -1 indicates that the letter is not coded, but is used for coding
    //  0 indicates that the letter is omitted for modern census archives
    //                              but acts like -1 for older census archives
    //  1 is for BFPV
    //  2 is for CGJKQSXZ
    //  3 is for DT
    //  4 is for L
    //  5 is for MN my home state
    //  6 is for R
    function makesoundex() {
        this.a = -1
        this.b = 1
        this.c = 2
        this.d = 3
        this.e = -1
        this.f = 1
        this.g = 2
        this.h = 0
        this.i = -1
        this.j = 2
        this.k = 2
        this.l = 4
        this.m = 5
        this.n = 5
        this.o = -1
        this.p = 1
        this.q = 2
        this.r = 6
        this.s = 2
        this.t = 3
        this.u = -1
        this.v = 1
        this.w = 0
        this.x = 2
        this.y = -1
        this.z = 2
    }

    var sndx = new makesoundex()

    // check to see that the input is valid
    function isSurname(name) {
        if (name == "" || name == null) {
            return false
        } else {
            for (var i = 0; i < name.length; i++) {
                var letter = name.charAt(i)
                if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) {
                    return false
                }
            }
        }
        return true
    }

    // Collapse out directly adjacent sounds
    // 1. Assume that surname.length>=1
    // 2. Assume that surname contains only lowercase letters
    function collapse(surname) {
        if (surname.length == 1) {
            return surname
        }
        var right = collapse(surname.substring(1, surname.length))
        if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) {
            return surname.charAt(0) + right.substring(1, right.length)
        }
        return surname.charAt(0) + right
    }

    // Collapse out directly adjacent sounds using the new National Archives method
    // 1. Assume that surname.length>=1
    // 2. Assume that surname contains only lowercase letters
    // 3. H and W are completely ignored
    function omit(surname) {
        if (surname.length == 1) {
            return surname
        }
        var right = omit(surname.substring(1, surname.length))
        if (!sndx[right.charAt(0)]) {
            return surname.charAt(0) + right.substring(1, right.length)
        }
        return surname.charAt(0) + right
    }

    // Output the coded sequence
    function output_sequence(seq) {
        var output = seq.charAt(0).toUpperCase() // Retain first letter
        output += "-" // Separate letter with a dash
        var stage2 = seq.substring(1, seq.length)
        var count = 0
        for (var i = 0; i < stage2.length && count < 3; i++) {
            if (sndx[stage2.charAt(i)] > 0) {
                output += sndx[stage2.charAt(i)]
                count++
            }
        }
        for (; count < 3; count++) {
            output += "0"
        }
        return output
    }

    // Compute the SOUNDEX code for the surname
    function soundex(value) {
        if (!isSurname(value)) {
            return null
        }
        var stage1 = collapse(value.toLowerCase())
        //form.result.value=output_sequence(stage1);

        var stage1 = omit(value.toLowerCase())
        var stage2 = collapse(stage1)
        return output_sequence(stage2);

    }

    function clean(u) {
        var u = u.replace(/\,/g, "");
        u = u.toLowerCase().split(" ");
        var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"];
        var n = [];
        for (var y = 0; y < u.length; y++) {
            var test = false;
            for (var z = 0; z < cw.length; z++) {
                if (u[y] != "" && u[y] != cw[z]) {
                    test = true;
                    break;
                }
            }
            if (test) {
    //Don't use & or $ in comparison
                var val = u[y].replace("$", "").replace("&", "");
                n.push(val);
            }
        }
        return n;
    }

Answer 1

回答by overlord1234

Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance

这是基于 Levenshtein 距离的答案https://en.wikipedia.org/wiki/Levenshtein_distance

function similarity(s1, s2) {
  var longer = s1;
  var shorter = s2;
  if (s1.length < s2.length) {
    longer = s2;
    shorter = s1;
  }
  var longerLength = longer.length;
  if (longerLength == 0) {
    return 1.0;
  }
  return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}

For calculating edit distance

用于计算编辑距离

function editDistance(s1, s2) {
  s1 = s1.toLowerCase();
  s2 = s2.toLowerCase();

  var costs = new Array();
  for (var i = 0; i <= s1.length; i++) {
    var lastValue = i;
    for (var j = 0; j <= s2.length; j++) {
      if (i == 0)
        costs[j] = j;
      else {
        if (j > 0) {
          var newValue = costs[j - 1];
          if (s1.charAt(i - 1) != s2.charAt(j - 1))
            newValue = Math.min(Math.min(newValue, lastValue),
              costs[j]) + 1;
          costs[j - 1] = lastValue;
          lastValue = newValue;
        }
      }
    }
    if (i > 0)
      costs[s2.length] = lastValue;
  }
  return costs[s2.length];
}

Usage

用法

similarity('Stack Overflow','Stack Ovrflw')

returns 0.8571428571428571

返回 0.8571428571428571

You can play with it below:

你可以在下面玩它：

function checkSimilarity(){
  var str1 = document.getElementById("lhsInput").value;
  var str2 = document.getElementById("rhsInput").value;
  document.getElementById("output").innerHTML = similarity(str1, str2);
}

function similarity(s1, s2) {
      var longer = s1;
      var shorter = s2;
      if (s1.length < s2.length) {
        longer = s2;
        shorter = s1;
      }
      var longerLength = longer.length;
      if (longerLength == 0) {
        return 1.0;
      }
      return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
    }

    function editDistance(s1, s2) {
      s1 = s1.toLowerCase();
      s2 = s2.toLowerCase();

      var costs = new Array();
      for (var i = 0; i <= s1.length; i++) {
        var lastValue = i;
        for (var j = 0; j <= s2.length; j++) {
          if (i == 0)
            costs[j] = j;
          else {
            if (j > 0) {
              var newValue = costs[j - 1];
              if (s1.charAt(i - 1) != s2.charAt(j - 1))
                newValue = Math.min(Math.min(newValue, lastValue),
                  costs[j]) + 1;
              costs[j - 1] = lastValue;
              lastValue = newValue;
            }
          }
        }
        if (i > 0)
          costs[s2.length] = lastValue;
      }
      return costs[s2.length];
    }

<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div>
<div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div>
<div>Match: <span id="output">No Input</span></div>

Answer 2

回答by jmort253

Here is a very simple function that does a comparison and returns a percentage based on equivalency. While it has not been tested for all possible scenarios, it may help you get started.

这是一个非常简单的函数，它进行比较并返回基于等价的百分比。虽然它尚未针对所有可能的情况进行测试，但它可能会帮助您入门。

function similar(a,b) {
    var lengthA = a.length;
    var lengthB = b.length;
    var equivalency = 0;
    var minLength = (a.length > b.length) ? b.length : a.length;    
    var maxLength = (a.length < b.length) ? b.length : a.length;    
    for(var i = 0; i < minLength; i++) {
        if(a[i] == b[i]) {
            equivalency++;
        }
    }


    var weight = equivalency / maxLength;
    return (weight * 100) + "%";
}
alert(similar("test","tes"));   // 75%
alert(similar("test","test"));  // 100%
alert(similar("test","testt")); // 80%
alert(similar("test","tess"));  // 75%

Answer 3

回答by Paul

Just one I quickly wrote that might be good enough for your purposes:

只是我很快写的一个可能足以满足您的目的：

function Compare(strA,strB){
    for(var result = 0, i = strA.length; i--;){
        if(typeof strB[i] == 'undefined' || strA[i] == strB[i]);
        else if(strA[i].toLowerCase() == strB[i].toLowerCase())
            result++;
        else
            result += 4;
    }
    return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length));
}

This weighs characters that are the same but different case 1 quarter as heavily as characters that are completely different or missing. It returns a number between 0 and 1, 1 meaning the strings are identical. 0 meaning they have no similarities. Examples:

这对相同但大小写不同的字符的权重与完全不同或缺失的字符的权重相同。它返回一个介于 0 和 1 之间的数字，1 表示字符串相同。0 表示它们没有相似之处。例子：

Compare("Apple", "Apple")    // 1
Compare("Apples", "Apple")   // 0.8181818181818181
Compare("Apples", "apple")   // 0.7727272727272727
Compare("a", "A")            // 0.75
Compare("Apples", "appppp")  // 0.45833333333333337
Compare("a", "b")            // 0

Answer 4

回答by VisioN

How about function similar_textfrom PHP.js library?

如何函数similar_text从PHP.js库？

It is based on a PHP function with the same name.

它基于一个同名的 PHP 函数。

function similar_text (first, second) {
    // Calculates the similarity between two strings  
    // discuss at: http://phpjs.org/functions/similar_text

    if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') {
        return 0;
    }

    first += '';
    second += '';

    var pos1 = 0,
        pos2 = 0,
        max = 0,
        firstLength = first.length,
        secondLength = second.length,
        p, q, l, sum;

    max = 0;

    for (p = 0; p < firstLength; p++) {
        for (q = 0; q < secondLength; q++) {
            for (l = 0;
            (p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++);
            if (l > max) {
                max = l;
                pos1 = p;
                pos2 = q;
            }
        }
    }

    sum = max;

    if (sum) {
        if (pos1 && pos2) {
            sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2));
        }

        if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) {
            sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max));
        }
    }

    return sum;
}

Answer 5

回答by Tushar Walzade

Using thislibrary for string similarity worked like a charm for me!

将这个库用于字符串相似性对我来说就像一个魅力！

Here's the Example -

这是示例 -

var similarity = stringSimilarity.compareTwoStrings("Apples","apple");    // => 0.88

Answer 6

回答by Octavio Díaz

fuzzyset- A fuzzy string set for javascript. fuzzyset is a data structure that performs something akin to fulltext search against data to determine likely mispellings and approximate string matching. Note that this is a javascript port of a python library.

Fuzzyset- 为 javascript 设置的模糊字符串。模糊集是一种数据结构，它对数据执行类似于全文搜索的操作，以确定可能的拼写错误和近似字符串匹配。请注意，这是一个 python 库的 javascript 端口。

Answer 7

回答by Leon Grin

To Find degree of similarity between two strings; we can use more than one or two methods but I am mostly inclined towards the usage of 'Dice's Coefficient'. which is better! well in my knowledge than using 'Levenshtein distance'

求两个字符串的相似度；我们可以使用不止一种或两种方法，但我最倾向于使用“骰子系数”。哪个更好！据我所知，比使用“ Levenshtein 距离”好

Using this 'string-similarity' package from npm you will be able to work on what I said above.

使用npm 中的这个“字符串相似性”包，您将能够处理我上面所说的内容。

some easy usage examples are

一些简单的使用示例是

var stringSimilarity = require('string-similarity');

var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed'); 

var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);

for more please visit the link given above. Thankyou.

有关更多信息，请访问上面给出的链接。谢谢你。

比较字符串 Javascript 返回可能的百分比

提问by Brad Ruderman

回答by overlord1234

回答by jmort253

回答by Paul

回答by VisioN

回答by Tushar Walzade

回答by Octavio Díaz

回答by Leon Grin

相关推荐

最近更新

标签

比较字符串 Javascript 返回可能的百分比

提问by Brad Ruderman

回答by overlord1234

回答by jmort253

回答by Paul

回答by VisioN

回答by Tushar Walzade

回答by Octavio Díaz

回答by Leon Grin

相关推荐

如何在 JavaScript 中检测 HTMLCollection/NodeList？

javascript 中的双重否定 (!!) - 目的是什么？

Javascript“onClick”事件不起作用

简单的 div onclick 显示 javascript

相关推荐

最近更新

标签