按“Levenshtein 距离"对数组进行排序在 Javascript 中具有最佳性能 [英] Sort an array by the "Levenshtein Distance" with best performance in Javascript

查看:26
本文介绍了按“Levenshtein 距离"对数组进行排序在 Javascript 中具有最佳性能的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

所以我有一个随机的 javascript 名称数组...

[@larry,@nicholas,@notch] 等

它们都以@ 符号开头.我想按 Levenshtein 距离对它们进行排序,以便列表顶部的那些最接近搜索词.目前,我有一些 javascript 使用 jQuery 的 .grep() 在它上面使用 javascript .match() 方法围绕按键输入的搜索词:

(自首次发布后编辑的代码)

limitArr = $.grep(imTheCallback, function(n){返回 n.match(searchy.toLowerCase())});modArr = limitArr.sort(levenshtein(searchy.toLowerCase(), 50))如果 (modArr[0].substr(0, 1) == '@') {如果 (atRes.childred('div').length <6) {modArr.forEach(函数(i){atRes.append('<div class="oneResult">' + i + '</div>');});}} else if (modArr[0].substr(0, 1) == '#') {if (tagRes.children('div').length <6) {modArr.forEach(函数(i){tagRes.append('<div class="oneResult">' + i + '</div>');});}}$('.oneResult:first-child').addClass('active');$('.oneResult').click(function(){window.location.href = 'http://hashtag.ly/' + $(this).html();});

它还包含一些 if 语句,用于检测数组是否包含主题标签 (#) 或提及 (@).忽略那个.imTheCallback 是名称数组,可以是主题标签或提及,然后 modArr 是已排序的数组.然后 .atResults.tagResults 元素是它每次在数组中附加的元素,这形成了基于输入的搜索词的名称列表.>

有 Levenshtein 距离算法:

var levenshtein = function(min, split) {//重新审视 Levenshtein 算法 - WebReflection尝试 {拆分 = !("0")[0]}赶上(我){分裂 = 真};返回函数(a,b){如果 (a == b)返回0;if (!a.length || !b.length)返回 b.length ||a.长度;如果(拆分){a = a.split("");b = b.split("")};var len1 = a.length + 1,len2 = b.length + 1,我 = 0,我 = 0,d = [[0]],c,j,J;而 (++i < len2)d[0][i] = i;我 = 0;而 (++i < len1) {J = j = 0;c = a[I];d[i] = [i];while(++j 

如何在我当前的代码中使用算法(或类似的算法)来对其进行排序而不影响性能?

更新:

所以我现在使用 James Westgate 的 Lev Dist 函数.工作 WAYYYY 快.所以性能解决了,现在的问题是使用它与源...

modArr = limitArr.sort(function(a, b){levDist(a, 搜索)levDist(b, 搜索)});

我现在的问题是对使用 .sort() 方法的一般理解.感谢帮助,谢谢.

谢谢!

解决方案

几年前我编写了一个内联拼写检查器并实现了 Levenshtein 算法——因为它是内联的,而且对于 IE8 我做了很多性能优化.

var levDist = function(s, t) {var d = [];//二维矩阵//步骤1var n = s.length;var m = t.length;如果 (n == 0) 返回 m;如果(m == 0)返回n;//在javascript中创建一个数组数组(降序循环更快)对于 (var i = n; i >= 0; i--) d[i] = [];//第2步对于 (var i = n; i >= 0; i--) d[i][0] = i;对于 (var j = m; j >= 0; j--) d[0][j] = j;//第 3 步for (var i = 1; i <= n; i++) {var s_i = s.charAt(i - 1);//步骤4for (var j = 1; j <= m; j++) {//检查到目前为止锯齿状的ld总数如果 (i == j && d[i][j] > 4) 返回 n;var t_j = t.charAt(j - 1);无功成本 = (s_i == t_j) ?0 : 1;//第五步//计算最小值var mi = d[i - 1][j] + 1;var b = d[i][j - 1] + 1;var c = d[i - 1][j - 1] + 成本;如果 (b < mi) mi = b;如果 (c 

So I have a random javascript array of names...

[@larry,@nicholas,@notch] etc.

They all start with the @ symbol. I'd like to sort them by the Levenshtein Distance so that the the ones at the top of the list are closest to the search term. At the moment, I have some javascript that uses jQuery's .grep() on it using javascript .match() method around the entered search term on key press:

(code edited since first publish)

limitArr = $.grep(imTheCallback, function(n){
    return n.match(searchy.toLowerCase())
});
modArr = limitArr.sort(levenshtein(searchy.toLowerCase(), 50))
if (modArr[0].substr(0, 1) == '@') {
    if (atRes.childred('div').length < 6) {
        modArr.forEach(function(i){
            atRes.append('<div class="oneResult">' + i + '</div>');
        });
    }
} else if (modArr[0].substr(0, 1) == '#') {
    if (tagRes.children('div').length < 6) {
        modArr.forEach(function(i){
            tagRes.append('<div class="oneResult">' + i + '</div>');
        });
    }
}

$('.oneResult:first-child').addClass('active');

$('.oneResult').click(function(){
    window.location.href = 'http://hashtag.ly/' + $(this).html();
});

It also has some if statements detecting if the array contains hashtags (#) or mentions (@). Ignore that. The imTheCallback is the array of names, either hashtags or mentions, then modArr is the array sorted. Then the .atResults and .tagResults elements are the elements that it appends each time in the array to, this forms a list of names based on the entered search terms.

I also have the Levenshtein Distance algorithm:

var levenshtein = function(min, split) {
    // Levenshtein Algorithm Revisited - WebReflection
    try {
        split = !("0")[0]
    } catch(i) {
        split = true
    };

    return function(a, b) {
        if (a == b)
            return 0;
        if (!a.length || !b.length)
            return b.length || a.length;
        if (split) {
            a = a.split("");
            b = b.split("")
        };
        var len1 = a.length + 1,
            len2 = b.length + 1,
            I = 0,
            i = 0,
            d = [[0]],
            c, j, J;
        while (++i < len2)
            d[0][i] = i;
        i = 0;
        while (++i < len1) {
            J = j = 0;
            c = a[I];
            d[i] = [i];
            while(++j < len2) {
                d[i][j] = min(d[I][j] + 1, d[i][J] + 1, d[I][J] + (c != b[J]));
                ++J;
            };
            ++I;
        };
        return d[len1 - 1][len2 - 1];
    }
}(Math.min, false);

How can I work with algorithm (or a similar one) into my current code to sort it without bad performance?

UPDATE:

So I'm now using James Westgate's Lev Dist function. Works WAYYYY fast. So performance is solved, the issue now is using it with source...

modArr = limitArr.sort(function(a, b){
    levDist(a, searchy)
    levDist(b, searchy)
});

My problem now is general understanding on using the .sort() method. Help is appreciated, thanks.

Thanks!

解决方案

I wrote an inline spell checker a few years ago and implemented a Levenshtein algorithm - since it was inline and for IE8 I did quite a lot of performance optimisation.

var levDist = function(s, t) {
    var d = []; //2d matrix

    // Step 1
    var n = s.length;
    var m = t.length;

    if (n == 0) return m;
    if (m == 0) return n;

    //Create an array of arrays in javascript (a descending loop is quicker)
    for (var i = n; i >= 0; i--) d[i] = [];

    // Step 2
    for (var i = n; i >= 0; i--) d[i][0] = i;
    for (var j = m; j >= 0; j--) d[0][j] = j;

    // Step 3
    for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);

        // Step 4
        for (var j = 1; j <= m; j++) {

            //Check the jagged ld total so far
            if (i == j && d[i][j] > 4) return n;

            var t_j = t.charAt(j - 1);
            var cost = (s_i == t_j) ? 0 : 1; // Step 5

            //Calculate the minimum
            var mi = d[i - 1][j] + 1;
            var b = d[i][j - 1] + 1;
            var c = d[i - 1][j - 1] + cost;

            if (b < mi) mi = b;
            if (c < mi) mi = c;

            d[i][j] = mi; // Step 6

            //Damerau transposition
            if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
                d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
            }
        }
    }

    // Step 7
    return d[n][m];
}

这篇关于按“Levenshtein 距离"对数组进行排序在 Javascript 中具有最佳性能的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆