在一个字符串,以最快的方式更换多个字符? [英] Replacing multiple characters in a string, the fastest way?

查看:167
本文介绍了在一个字符串,以最快的方式更换多个字符?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我导入从旧的数据库有多个字符串字段的记录一定数目到一个新的数据库。这似乎是很慢,我怀疑这是因为我做到这一点:

 的foreach(VAR oldObj在OLDDB)
{
NewObject的newObj = NewObject的新();
newObj.Name = oldObj.Name.Trim()。更换('^','C')。更换('@','Z')。更换('[','S')
.Replace(']','C')。更换(''','Z')。更换('}','C')
.Replace('〜','C') 。.Replace('{','S')更换('\\','DJ的);
newObj.Surname = oldObj.Surname.Trim()。更换('^','C')。更换('@','Z')。更换('[','S')
.Replace(']','C')。更换(''','Z')。更换('}','C')
.Replace('〜','C') 。.Replace('{','S')更换('\\','DJ的);
newObj.Address = oldObj.Address.Trim()。更换('^','C')。更换('@','Z')。更换('[','S')
.Replace(']','C')。更换(''','Z')。更换('}','C')
.Replace('〜','C') 。.Replace('{','S')更换('\\','DJ的);
newObj.Note = oldObj.Note.Trim()。更换('^','C')。更换('@','Z')。更换('[','S')
.Replace(']','C')。更换(''','Z')。更换('}','C')
.Replace('〜','C') 。.Replace('{','S')更换('\\','DJ的);
/ *
部分... ...加工
* /
}

现在,我已经阅读过一些网帖和文章,我已经看到了这个许多不同的看法。有人说这是更好的,如果我做的正则表达式与 MatchEvaluator ,有人说这是最好的,以保留原样。



虽然这是可能的,它会很容易,我只是做了自己的基准情况下,我决定在这里提出一个问题,以防别人一直想知道同样的问题,或者万一有人提前知道了。



那么,什么是用C#这样做的最快方法是什么?



修改



我已经发布了基准的此处。在第一眼看到它看起来像理查德的方法可能是最快的。然而,他的方式,也不是马克的,会做,因为错误的正则表达式的东西。从



纠正模式之后

  @\ ^ @ \ [\]`\}〜\ {\\ \\\

  @\ ^ | @ | \ [| \] |`| \} |〜| \ {| \\

出现

仿佛用链式.Replace(老办法)调用毕竟


最快的

解决方案

感谢您输入家伙。
我写了一个快速和肮脏的基准来测试你的投入。我已经测试解析4串用500.000迭代和已经做了4遍。结果如下:

 
***第一遍
旧(链式与string.replace())方式完成814 MS
logicnp(ToCharArray)的方式在916毫秒
奥莱克西(StringBuilder的)的方式在943毫秒$ b $完成b安德烈·克里斯托弗·安德森(LAMBDA W /聚合)的方式在2551毫秒
理查德完成完成(正则表达式W / MatchEvaluator)的方式在215毫秒
马克Gravell(静态正则表达式)的方式在1008毫秒

***完成完成第二遍
旧(链式与string.replace( ))的方式在786毫秒
logicnp(ToCharArray)的方式在920毫秒
奥莱克西(StringBuilder的)的方式在905毫秒$ b $完成b安德烈·克里斯托弗·安德森(LAMBDA W /聚合)的方式完成了完成2515年完成MS
·理查德(正则表达式W / MatchEvaluator)的方式在217毫秒
马克Gravell(静态正则表达式)的方式在1025毫秒

***完成完成传3
老(链式与string.replace())方式775毫秒
logicnp(ToCharArray)方式903毫秒
奥莱克西(StringBuilder的)方式931毫秒$ b $完成b安德烈克里斯托弗·安德森完成完成(LAMBDA W /聚合)方式2529毫秒
·理查德(正则表达式完成W / MatchEvaluator)方式214毫秒
马克Gravell(静态正则表达式)的方式1022毫秒

***通完成完成4
旧(连锁与string.replace())方式799毫秒
logicnp(ToCharArray)方式908毫秒
奥莱克西(StringBuilder的)方式938毫秒
安德烈完成完成完成克里斯托弗·安德森(LAMBDA W /聚合)的方式在2592毫秒$ b $完成b理查德(正则表达式W / MatchEvaluator)的方式在225毫秒
马克Gravell(静态正则表达式)的方式在1050毫秒
完成

对于这个基准测试的代码如下。请检查代码并确认@Richard已经拿到了最快的方法。注意,我没有检查,如果输出是正确的,我认为他们是



 使用系统; 
使用System.Collections.Generic;
使用System.Linq的;
使用System.Text;使用System.Diagnostics程序
;使用System.Text.RegularExpressions
;

命名空间StringReplaceTest
{
类节目
{
静态字符串TEST1 =A ^ @ [BCD
静态字符串TEST2 =E] FGH\\
静态字符串TEST3 =ijk`l} M;
静态字符串TEST4 =nopq〜{R;

静态只读字典<焦炭,串> REPL =
新字典<焦炭,串>
{
{'^',C},{@,Z},{[,S},{],C},{ ''',Z},{},C},{〜,C},{{,S},{\\,DJ }
};

静态只读正则表达式replaceRegex;

静态程序()//静态初始化
{
StringBuilder的模式=新的StringBuilder()追加('[')。
的foreach(在repl.Keys变种键)
pattern.Append(Regex.Escape(key.ToString()));
pattern.Append(']');
replaceRegex ​​=新的正则表达式(pattern.ToString(),RegexOptions.Compiled);
}

公共静态字符串消毒(字符串输入)
{
返回replaceRegex.Replace(输入,匹配= GT;
{
返回REPL [match.Value [0]];
});
}

静态字符串DoGeneralReplace(字符串输入)
{
变种某人=新的StringBuilder(输入);
返回sb.Replace('^','C')。更换('@','Z')。更换('[','S'),更换(']','C') .Replace(''','Z')。更换('}','C')。更换('〜','C')。更换('{','S'),更换('\ \','DJ的)的ToString();
}

//方法与映射
静态字符串替换字符替换(字符串输入,IDictionary的<焦炭,焦炭> replacementMap)
{
返回replacementMap.Keys
.Aggregate(输入(电流,oldChar)
=> current.Replace(oldChar,replacementMap [oldChar]));
}

静态无效的主要(字串[] args)
{
的for(int i = 1; I< 5;我++)
DOIT (一世);
}

静态无效DOIT(INT N)
{
秒表SW =新的秒表();
INT IDX = 0;

Console.WriteLine(***通行证+ n.ToString());
//老办法
sw.Start();
为(IDX = 0; IDX< 500000; IDX ++)
{
串RESULT1 = test1.Replace('^','C')更换('@','Z ').Replace('[','S'),更换(']','C')。更换(''','Z')。更换('}','C'),更换(' 〜','C'。)更换('{','S'。)更换('\\','DJ的);
串RESULT2 = test2.Replace('^','C')。更换('@','Z')。更换('[','S'),更换(']','C ').Replace(''','Z')。更换('}','C')。更换('〜','C')。更换('{','S'),更换(' \\','DJ的);
串result3 = test3.Replace('^','C')。更换('@','Z')。更换('[','S'),更换(']','C ').Replace(''','Z')。更换('}','C')。更换('〜','C')。更换('{','S'),更换(' \\','DJ的);
串result4 = test4.Replace('^','C')。更换('@','Z')。更换('[','S'),更换(']','C ').Replace(''','Z')。更换('}','C')。更换('〜','C')。更换('{','S'),更换(' \\','DJ的);
}
sw.Stop();
Console.WriteLine(老(链式与string.replace())方式完成了+ sw.ElapsedMilliseconds.ToString()+毫秒);

&字典LT;焦炭,焦炭>更换=新词典<焦炭,焦炭>();
replacements.Add('^','C');
replacements.Add('@','Z');
replacements.Add('[','S');
replacements.Add(']','C');
replacements.Add(''','Z');
replacements.Add('}','C');
replacements.Add('〜','C');
replacements.Add('{','S');
replacements.Add('\\','DJ的);

// logicnp方式
sw.Reset();
sw.Start();
为(IDX = 0; IDX< 500000; IDX ++)
{
的char [] = charArray1 test1.ToCharArray();
的for(int i = 0; I< charArray1.Length;我++)
{
字符newChar;
如果(replacements.TryGetValue(TEST1 [I]中,出newChar))
charArray1 [I] = newChar;
}
串RESULT1 =新的字符串(charArray1);

的char [] = charArray2 test2.ToCharArray();
的for(int i = 0; I< charArray2.Length;我++)
{
字符newChar;
如果(replacements.TryGetValue(TEST2 [I]中,出newChar))
charArray2 [I] = newChar;
}
串RESULT2 =新的字符串(charArray2);

的char [] = charArray3 test3.ToCharArray();
的for(int i = 0; I< charArray3.Length;我++)
{
字符newChar;
如果(replacements.TryGetValue(TEST3 [I]中,出newChar))
charArray3 [I] = newChar;
}
串result3 =新的字符串(charArray3);

的char [] = charArray4 test4.ToCharArray();
的for(int i = 0; I< charArray4.Length;我++)
{
字符newChar;
如果(replacements.TryGetValue(TEST4 [I]中,出newChar))
charArray4 [I] = newChar;
}
串result4 =新的字符串(charArray4);
}
sw.Stop();
Console.WriteLine(logicnp(ToCharArray)方式完成了+ sw.ElapsedMilliseconds.ToString()+毫秒);

//奥莱克西方式
sw.Reset();
sw.Start();
为(IDX = 0; IDX< 500000; IDX ++)
{
串RESULT1 = DoGeneralReplace(TEST1);
串RESULT2 = DoGeneralReplace(测试2);
串result3 = DoGeneralReplace(TEST3);
串result4 = DoGeneralReplace(TEST4);
}
sw.Stop();
Console.WriteLine(奥莱克西(StringBuilder的)方式完成了+ sw.ElapsedMilliseconds.ToString()+毫秒);

//安德烈克里斯托弗·安德森方式
sw.Reset();
sw.Start();
为(IDX = 0; IDX< 500000; IDX ++)
{
RESULT1字符串替换=(为test1,更换);
串结果2 =替换(测试2,更换);
串result3 =替换(TEST3,更换);
串result4 =替换(TEST4,更换);
}
sw.Stop();
Console.WriteLine(安德烈克里斯托弗·安德森(LAMBDA W /聚合)方式完成了+ sw.ElapsedMilliseconds.ToString()+毫秒);

//理查德方式
sw.Reset();
sw.Start();
正则表达式章=新的正则表达式(@\ ^ | @ | \ [| \] |`| \} |〜| \ {| \\);
MatchEvaluator EVAL =匹配=>
{
开关(match.Value)
{
案^:回归C;
案@:返回Z;
案[:回归S;
案]:回归C;
案`:返回Z;
案}:回归C;
案〜:回归C;
案{:回归S;
案\\:回归DJ;
默认:抛出新的异常(意外的比赛!);
}
};
为(IDX = 0; IDX< 500000; IDX ++)
{
串RESULT1 = reg.Replace(为test1,EVAL);
串RESULT2 = reg.Replace(test2的,EVAL);
串result3 = reg.Replace(TEST3,EVAL);
串result4 = reg.Replace(TEST4,EVAL);
}
sw.Stop();
Console.WriteLine(理查德(正则表达式W / MatchEvaluator)方式完成了+ sw.ElapsedMilliseconds.ToString()+毫秒);

//马克Gravell方式
sw.Reset();
sw.Start();
为(IDX = 0; IDX< 500000; IDX ++)
{
串RESULT1 =消毒(TEST1);
串RESULT2 =消毒(测试2);
串result3 =消毒(TEST3);
串result4 =消毒(TEST4);
}
sw.Stop();
Console.WriteLine(马克Gravell(静态正则表达式)方式完成了+ sw.ElapsedMilliseconds.ToString()+ms\\\
);
}
}
}


I am importing some number of records with multiple string fields from an old db to a new db. It seems to be very slow and I suspect it's because I do this:

foreach (var oldObj in oldDB)
{
    NewObject newObj = new NewObject();
    newObj.Name = oldObj.Name.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    newObj.Surname = oldObj.Surname.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    newObj.Address = oldObj.Address.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    newObj.Note = oldObj.Note.Trim().Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š')
        .Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć')
        .Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
    /*
    ... some processing ...
    */
}

Now, I have read some posts and articles through the Net where I have seen many different thoughts about this. Some say it's better if I'd do regex with MatchEvaluator, some say it's the best to leave it as is.

While it's possible that it'd be easier for me to just do a benchmark case for myself, I decided to ask a question here in case someone else has been wondering about the same question, or in case someone knows in advance.

So what is the fastest way to do this in C#?

EDIT

I have posted the benchmark here. At the first sight it looks like Richard's way might be the fastest. However, his way, nor Marc's, would do anything because of the wrong Regex pattern. After correcting the pattern from

@"\^@\[\]`\}~\{\\" 

to

@"\^|@|\[|\]|`|\}|~|\{|\\" 

it appears as if the old way with chained .Replace() calls is the fastest after all

解决方案

Thanks for your inputs guys. I wrote a quick and dirty benchmark to test your inputs. I have tested parsing 4 strings with 500.000 iterations and have done 4 passes. The result is as follows:

*** Pass 1
Old (Chained String.Replace()) way completed in 814 ms
logicnp (ToCharArray) way completed in 916 ms
oleksii (StringBuilder) way completed in 943 ms
André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2551 ms
Richard (Regex w/ MatchEvaluator) way completed in 215 ms
Marc Gravell (Static Regex) way completed in 1008 ms

*** Pass 2
Old (Chained String.Replace()) way completed in 786 ms
logicnp (ToCharArray) way completed in 920 ms
oleksii (StringBuilder) way completed in 905 ms
André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2515 ms
Richard (Regex w/ MatchEvaluator) way completed in 217 ms
Marc Gravell (Static Regex) way completed in 1025 ms

*** Pass 3
Old (Chained String.Replace()) way completed in 775 ms
logicnp (ToCharArray) way completed in 903 ms
oleksii (StringBuilder) way completed in 931 ms
André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2529 ms
Richard (Regex w/ MatchEvaluator) way completed in 214 ms
Marc Gravell (Static Regex) way completed in 1022 ms

*** Pass 4
Old (Chained String.Replace()) way completed in 799 ms
logicnp (ToCharArray) way completed in 908 ms
oleksii (StringBuilder) way completed in 938 ms
André Christoffer Andersen (Lambda w/ Aggregate) way completed in 2592 ms
Richard (Regex w/ MatchEvaluator) way completed in 225 ms
Marc Gravell (Static Regex) way completed in 1050 ms

The code for this benchmark is below. Please review the code and confirm that @Richard has got the fastest way. Note that I haven't checked if outputs were correct, I assumed they were.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Text.RegularExpressions;

namespace StringReplaceTest
{
    class Program
    {
        static string test1 = "A^@[BCD";
        static string test2 = "E]FGH\\";
        static string test3 = "ijk`l}m";
        static string test4 = "nopq~{r";

        static readonly Dictionary<char, string> repl =
            new Dictionary<char, string> 
            { 
                {'^', "Č"}, {'@', "Ž"}, {'[', "Š"}, {']', "Ć"}, {'`', "ž"}, {'}', "ć"}, {'~', "č"}, {'{', "š"}, {'\\', "Đ"} 
            };

        static readonly Regex replaceRegex;

        static Program() // static initializer 
        {
            StringBuilder pattern = new StringBuilder().Append('[');
            foreach (var key in repl.Keys)
                pattern.Append(Regex.Escape(key.ToString()));
            pattern.Append(']');
            replaceRegex = new Regex(pattern.ToString(), RegexOptions.Compiled);
        }

        public static string Sanitize(string input)
        {
            return replaceRegex.Replace(input, match =>
            {
                return repl[match.Value[0]];
            });
        } 

        static string DoGeneralReplace(string input) 
        { 
            var sb = new StringBuilder(input);
            return sb.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ').ToString(); 
        }

        //Method for replacing chars with a mapping 
        static string Replace(string input, IDictionary<char, char> replacementMap)
        {
            return replacementMap.Keys
                .Aggregate(input, (current, oldChar)
                    => current.Replace(oldChar, replacementMap[oldChar]));
        } 

        static void Main(string[] args)
        {
            for (int i = 1; i < 5; i++)
                DoIt(i);
        }

        static void DoIt(int n)
        {
            Stopwatch sw = new Stopwatch();
            int idx = 0;

            Console.WriteLine("*** Pass " + n.ToString());
            // old way
            sw.Start();
            for (idx = 0; idx < 500000; idx++)
            {
                string result1 = test1.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
                string result2 = test2.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
                string result3 = test3.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
                string result4 = test4.Replace('^', 'Č').Replace('@', 'Ž').Replace('[', 'Š').Replace(']', 'Ć').Replace('`', 'ž').Replace('}', 'ć').Replace('~', 'č').Replace('{', 'š').Replace('\\', 'Đ');
            }
            sw.Stop();
            Console.WriteLine("Old (Chained String.Replace()) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");

            Dictionary<char, char> replacements = new Dictionary<char, char>();
            replacements.Add('^', 'Č');
            replacements.Add('@', 'Ž');
            replacements.Add('[', 'Š');
            replacements.Add(']', 'Ć');
            replacements.Add('`', 'ž');
            replacements.Add('}', 'ć');
            replacements.Add('~', 'č');
            replacements.Add('{', 'š');
            replacements.Add('\\', 'Đ');

            // logicnp way
            sw.Reset();
            sw.Start();
            for (idx = 0; idx < 500000; idx++)
            {
                char[] charArray1 = test1.ToCharArray();
                for (int i = 0; i < charArray1.Length; i++)
                {
                    char newChar;
                    if (replacements.TryGetValue(test1[i], out newChar))
                        charArray1[i] = newChar;
                }
                string result1 = new string(charArray1);

                char[] charArray2 = test2.ToCharArray();
                for (int i = 0; i < charArray2.Length; i++)
                {
                    char newChar;
                    if (replacements.TryGetValue(test2[i], out newChar))
                        charArray2[i] = newChar;
                }
                string result2 = new string(charArray2);

                char[] charArray3 = test3.ToCharArray();
                for (int i = 0; i < charArray3.Length; i++)
                {
                    char newChar;
                    if (replacements.TryGetValue(test3[i], out newChar))
                        charArray3[i] = newChar;
                }
                string result3 = new string(charArray3);

                char[] charArray4 = test4.ToCharArray();
                for (int i = 0; i < charArray4.Length; i++)
                {
                    char newChar;
                    if (replacements.TryGetValue(test4[i], out newChar))
                        charArray4[i] = newChar;
                }
                string result4 = new string(charArray4);
            }
            sw.Stop();
            Console.WriteLine("logicnp (ToCharArray) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");

            // oleksii way
            sw.Reset();
            sw.Start();
            for (idx = 0; idx < 500000; idx++)
            {
                string result1 = DoGeneralReplace(test1);
                string result2 = DoGeneralReplace(test2);
                string result3 = DoGeneralReplace(test3);
                string result4 = DoGeneralReplace(test4);
            }
            sw.Stop();
            Console.WriteLine("oleksii (StringBuilder) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");

            // André Christoffer Andersen way
            sw.Reset();
            sw.Start();
            for (idx = 0; idx < 500000; idx++)
            {
                string result1 = Replace(test1, replacements);
                string result2 = Replace(test2, replacements);
                string result3 = Replace(test3, replacements);
                string result4 = Replace(test4, replacements);
            }
            sw.Stop();
            Console.WriteLine("André Christoffer Andersen (Lambda w/ Aggregate) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");

            // Richard way
            sw.Reset();
            sw.Start();
            Regex reg = new Regex(@"\^|@|\[|\]|`|\}|~|\{|\\");
            MatchEvaluator eval = match =>
            {
                switch (match.Value)
                {
                    case "^": return "Č";
                    case "@": return "Ž";
                    case "[": return "Š";
                    case "]": return "Ć";
                    case "`": return "ž";
                    case "}": return "ć";
                    case "~": return "č";
                    case "{": return "š";
                    case "\\": return "Đ";
                    default: throw new Exception("Unexpected match!");
                }
            };
            for (idx = 0; idx < 500000; idx++)
            {
                string result1 = reg.Replace(test1, eval);
                string result2 = reg.Replace(test2, eval);
                string result3 = reg.Replace(test3, eval);
                string result4 = reg.Replace(test4, eval);
            }
            sw.Stop();
            Console.WriteLine("Richard (Regex w/ MatchEvaluator) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms");

            // Marc Gravell way
            sw.Reset();
            sw.Start();
            for (idx = 0; idx < 500000; idx++)
            {
                string result1 = Sanitize(test1);
                string result2 = Sanitize(test2);
                string result3 = Sanitize(test3);
                string result4 = Sanitize(test4);
            }
            sw.Stop();
            Console.WriteLine("Marc Gravell (Static Regex) way completed in " + sw.ElapsedMilliseconds.ToString() + " ms\n");
        }
    }
}

这篇关于在一个字符串,以最快的方式更换多个字符?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆