为什么.NET在这种情况下,速度比C ++? [英] Why is .NET faster than C++ in this case?

查看:158
本文介绍了为什么.NET在这种情况下,速度比C ++?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

请确保您运行IDE之外。这是关键。

-edit-我爱SLaks评论。 误传在这些答案的数量是惊人的。 :D

冷静的家伙。 pretty的多的是,你错了。我没有进行优化。 <击>事实证明,任何优化我做不够好。 我用gettimeofday的跑了code在海湾合作委员会(我会在下面贴code)和使用 G ++ -O2 file.cpp 并得到了稍快的结果的话,C# 。 <击>也许MS没有创造必要在这种特定的情况下,但在下载和安装MinGW的我进行了测试,发现速度是几乎相同的后优化。 <一href="http://stackoverflow.com/questions/2285864/why-is-net-faster-than-c-in-this-case/2286084#2286084">Justicle似乎是正确的。我可以发誓我用的时钟我的电脑上使用的计数,并发现它是慢,但问题就迎刃而解了。 C ++的速度是不是几乎两倍于MS编译器慢。

当我的朋友告诉我,这我简直不敢相信。所以我把他的code,并把一些定时器到它。

而不是我使用C#。我不断地得到了在C#中更快的结果。为什么? .NET版本几乎一半的时候不管我用什么号码。

C ++版本(坏的版本):

 的#include&LT;的iostream&GT;
#包括&LT; stdio.h中&GT;
#包括&LT; intrin.h&GT;
#包括&LT; WINDOWS.H&GT;
使用名字空间std;

INT FIB(INT N)
{
    如果(N 2)返回N;
    返回FIB(N  -  1)+ FIB(N  -  2);
}

诠释的main()
{
    __int64时间= 0xFFFFFFFF的;
    而(1)
    {
        INT N;
        // CIN&GT;&GT; N;
        N = 41;
        如果(N小于0)打破;
__int64开始= __rdtsc();
        INT RES = FIB(N);
__int64结束= __rdtsc();
        COUT&LT;&LT;水库&LT;&LT; ENDL;
        COUT&LT;&LT; (浮动)(完启动)/百万&LT;&LT; ENDL;
        打破;
    }

    返回0;
}
 

C ++版本(更好的版本):

 的#include&LT;的iostream&GT;
#包括&LT; stdio.h中&GT;
#包括&LT; intrin.h&GT;
#包括&LT; WINDOWS.H&GT;
使用名字空间std;

INT FIB(INT N)
{
    如果(N 2)返回N;
    返回FIB(N  -  1)+ FIB(N  -  2);
}

诠释的main()
{
    __int64时间= 0xFFFFFFFF的;
    而(1)
    {
        INT N;
        // CIN&GT;&GT; N;
        N = 41;
        如果(N小于0)打破;
        LARGE_INTEGER开始,结束,三角洲,频率;
        :: QueryPerformanceFrequency的(安培;频率);
        :: QueryPerformanceCounter的(安培;启动);
        INT RES = FIB(N);
        :: QueryPerformanceCounter的(放大器端);
        delta.QuadPart = end.QuadPart  -  start.QuadPart;
        COUT&LT;&LT;水库&LT;&LT; ENDL;
        COUT&LT;&LT; (delta.QuadPart * 1000)/ freq.QuadPart&LT;&LT; ENDL;
打破;
    }

    返回0;
}
 

C#版本:

 使用系统;
使用System.Collections.Generic;
使用System.Linq的;
使用System.Text;

使用了System.Runtime.InteropServices;
使用System.ComponentModel;
使用的System.Threading;
使用System.IO;

使用System.Diagnostics程序;

命名空间fibCSTest
{
    类节目
    {
         静态INT FIB(INT N)
         {
            如果(N 2)返回N;
            返回FIB(N  -  1)+ FIB(N  -  2);
         }

         静态无效的主要(字串[] args)
         {
             //变种SW =新的秒表();
             // VAR定时器=新PAB.HiPerfTimer();
             VAR定时器=新的秒表();
             而(真)
             {
                 INT N;
                 // CIN&GT;&GT; N;
                 N = 41;
                 如果(N小于0)打破;
                 timer.Start();
                 INT RES = FIB(N);
                 timer.Stop();
                 Console.WriteLine(RES);
                 Console.WriteLine(timer.ElapsedMilliseconds);
                 打破;
             }
         }
    }
}
 

GCC版本:

 的#include&LT;的iostream&GT;
#包括&LT; stdio.h中&GT;
#包括&LT; SYS / time.h中&GT;
使用名字空间std;

INT FIB(INT N)
{
    如果(N 2)返回N;
    返回FIB(N  -  1)+ FIB(N  -  2);
}

诠释的main()
{
    timeval中开始,结束;
    而(1)
    {
        INT N;
        // CIN&GT;&GT; N;
        N = 41;
        如果(N小于0)打破;
        函数gettimeofday(安培;启动,0);
        INT RES = FIB(N);
        函数gettimeofday(&安培端,0);
        INT秒= end.tv_sec  -  start.tv_sec;
        INT微秒= end.tv_usec  -  start.tv_usec;
        COUT&LT;&LT;水库&LT;&LT; ENDL;
        COUT&LT;&LT;秒&LT;&LT; &LT;&LT; USEC&LT;&LT; ENDL;
        打破;
    }

    返回0;
}
 

解决方案

编辑:TL / DR版本:CLR JIT将内联递归的一个级别,MSVC 8 SP1将不无的#pragma inline_recursion(上)。你应该调试器来获得全面优化的JIT之外运行的C#版本。

我得到了类似的结果与C#来acidzombie24与C ++使用VS 2008 SP1上的酷睿双核笔记本电脑运行Vista插在高性能电源设置(〜1600毫秒主场迎战〜3800毫秒)。这有点棘手看到优化这些JIT过C#code,但在x86它归结为这样的:

  00000000 55推EBP
00000001 8B EC MOV EBP,ESP
00000003 57推EDI
00000004 56推ESI
00000005 53推EBX
00000006 8B F1 MOV ESI,ECX
00000008 83 FE 02 CMP ESI,2
0000000b 7D 07 JGE 00000014
0000000d 8B C6 MOV EAX,ESI
0000000F 5B流行EBX
00000010 5E流行ESI
00000011 5F流行EDI
00000012 5D弹出EBP
00000013 C3 RET
            返回FIB(N  -  1)+ FIB(N  -  2);
00000014 8D 7E FF LEA EDI,[ESI-1]
00000017 83 FF 02 CMP EDI,2
0000001a 7D 04 JGE 00000020
0000001c 8B DF MOV EBX,EDI
0000001E EB 19 JMP 00000039
00000020 8D 4F FF LEA ECX,[EDI-1]
00000023 FF 15 F8 2F 12 00呼叫DWORD PTR DS:[00122FF8h]
00000029 8B D8 MOV EBX,EAX
0000002b 4F月EDI
0000002c 4F月EDI
0000002D 8B CF MOV ECX,EDI
0000002f FF 15 F8 2F 12 00呼叫DWORD PTR DS:[00122FF8h]
00000035 03 C3加EAX,EBX
00000037 8B D8 MOV EBX,EAX
00000039 4E月ESI
0000003a 4E月ESI
0000003b 83 FE 02 CMP ESI,2
0000003e 7D 04 JGE 00000044
00000040 8B D6 MOV EDX,ESI
00000042 EB 19 JMP 0000005D
00000044 8D 4E FF LEA ECX,[ESI-1]
00000047 FF 15 F8 2F 12 00呼叫DWORD PTR DS:[00122FF8h]
0000004d 8B F8 MOV EDI,EAX
0000004f 4E月ESI
00000050 4E月ESI
00000051 8B CE MOV ECX,ESI
00000053 FF 15 F8 2F 12 00呼叫DWORD PTR DS:[00122FF8h]
00000059 03 C7加EAX,EDI
0000005b 8B D0 MOV EDX,EAX
0000005d 03 DA增加EBX,EDX
0000005f 8B C3 MOV EAX,EBX
00000061 5B流行EBX
00000062 5E流行ESI
00000063 5F流行EDI
00000064 5D弹出EBP
00000065 C3 RET
 

在对比++产生code中的C(/牛/ Ob2的/爱/ OT / Oy公司/ GL / GR):

  INT FIB(INT N)
{
00B31000 56推ESI
00B31001 8B F1 MOV ESI,ECX
    如果(N 2)返回N;
00B31003 83 FE 02 CMP ESI,2
00B31006 7D 04 JGE FIB + 0CH(0B3100Ch)
00B31008 8B C6 MOV EAX,ESI
00B3100A 5E流行ESI
00B3100B C3 RET
00B3100C 57推EDI
    返回FIB(N  -  1)+ FIB(N  -  2);
00B3100D 8D 4E FE LEA ECX,[ESI-2]
00B31010 E8 EB FF FF FF调用FIB(0B31000h)
00B31015 8D 4E FF LEA ECX,[ESI-1]
00B31018 8B F8 MOV EDI,EAX
00B3101A E8 E1 FF FF FF调用FIB(0B31000h)
00B3101F 03 C7加EAX,EDI
00B31021 5F流行EDI
00B31022 5E流行ESI
}
00B31023 C3 RET
 

C#版本基本上都是内联 FIB(N-1) FIB(N-2)。对于一个功能就是这样通话重,减少功能的调用次数的关键是速度。更换 FIB 为以下内容:

  INT FIB(INT N);

INT fib2(INT N)
{
    如果(N 2)返回N;
    返回FIB(N  -  1)+ FIB(N  -  2);
}

INT FIB(INT N)
{
    如果(N 2)返回N;
    返回fib2(N  -  1)+ fib2(N  -  2);
}
 

获取它归结为〜1900毫秒。顺便说一句,如果我使用的#pragma inline_recursion(上)我得到与原 FIB类似的结果。展开它多了一个层次:

  INT FIB(INT N);

INT fib3(INT N)
{
    如果(N 2)返回N;
    返回FIB(N  -  1)+ FIB(N  -  2);
}

INT fib2(INT N)
{
    如果(N 2)返回N;
    返回fib3(N  -  1)+ fib3(N  -  2);
}

INT FIB(INT N)
{
    如果(N 2)返回N;
    返回fib2(N  -  1)+ fib2(N  -  2);
}
 

获取它归结为〜1380毫秒。除此之外,它逐渐减少。

所以看来CLR的JIT我的机器会内联递归调用一个级别,而C ++编译器不会做,默认情况下。

如果唯一的全性能的关键code就像 FIB

Make sure you run outside of the IDE. That is key.

-edit- I LOVE SLaks comment. "The amount of misinformation in these answers is staggering." :D

Calm down guys. Pretty much all of you were wrong. I DID make optimizations. It turns out whatever optimizations I made wasn't good enough. I ran the code in GCC using gettimeofday (I'll paste code below) and used g++ -O2 file.cpp and got slightly faster results then C#. Maybe MS didn't create the optimizations needed in this specific case but after downloading and installing mingw I was tested and found the speed to be near identical. Justicle Seems to be right. I could have sworn I use clock on my PC and used that to count and found it was slower but problem solved. C++ speed isn't almost twice as slower in the MS compiler.

When my friend informed me of this I couldn't believe it. So I took his code and put some timers onto it.

Instead of Boo I used C#. I constantly got faster results in C#. Why? The .NET version was nearly half the time no matter what number I used.

C++ version (bad version):

#include <iostream>
#include <stdio.h>
#include <intrin.h>
#include <windows.h>
using namespace std;

int fib(int n)
{
    if (n < 2) return n;
    return fib(n - 1) + fib(n - 2);
}

int main()
{
    __int64 time = 0xFFFFFFFF;
    while (1)
    {
        int n;
        //cin >> n;
        n = 41;
        if (n < 0) break;
__int64 start = __rdtsc();
        int res = fib(n);
__int64 end = __rdtsc();
        cout << res << endl;
        cout << (float)(end-start)/1000000<<endl;
        break;
    }

    return 0;
}

C++ version (better version):

#include <iostream>
#include <stdio.h>
#include <intrin.h>
#include <windows.h>
using namespace std;

int fib(int n)
{
    if (n < 2) return n;
    return fib(n - 1) + fib(n - 2);
}

int main()
{
    __int64 time = 0xFFFFFFFF;
    while (1)
    {
        int n;
        //cin >> n;
        n = 41;
        if (n < 0) break;
        LARGE_INTEGER start, end, delta, freq;
        ::QueryPerformanceFrequency( &freq );
        ::QueryPerformanceCounter( &start );
        int res = fib(n);
        ::QueryPerformanceCounter( &end );
        delta.QuadPart = end.QuadPart - start.QuadPart;
        cout << res << endl;
        cout << ( delta.QuadPart * 1000 ) / freq.QuadPart <<endl;
break;
    }

    return 0;
}

C# version:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

using System.Runtime.InteropServices;
using System.ComponentModel;
using System.Threading;
using System.IO;

using System.Diagnostics;

namespace fibCSTest
{
    class Program
    {
         static int fib(int n)
         {
            if (n < 2)return n;
            return fib(n - 1) + fib(n - 2);
         }

         static void Main(string[] args)
         {
             //var sw = new Stopwatch();
             //var timer = new PAB.HiPerfTimer();
             var timer = new Stopwatch();
             while (true)
             {
                 int n;
                 //cin >> n;
                 n = 41;
                 if (n < 0) break;
                 timer.Start();
                 int res = fib(n);
                 timer.Stop();
                 Console.WriteLine(res);
                 Console.WriteLine(timer.ElapsedMilliseconds);
                 break;
             }
         }
    }
}

GCC version:

#include <iostream>
#include <stdio.h>
#include <sys/time.h>
using namespace std;

int fib(int n)
{
    if (n < 2) return n;
    return fib(n - 1) + fib(n - 2);
}

int main()
{
    timeval start, end;
    while (1)
    {
        int n;
        //cin >> n;
        n = 41;
        if (n < 0) break;
        gettimeofday(&start, 0);
        int res = fib(n);
        gettimeofday(&end, 0);
        int sec = end.tv_sec - start.tv_sec;
        int usec = end.tv_usec - start.tv_usec;
        cout << res << endl;
        cout << sec << " " << usec <<endl;
        break;
    }

    return 0;
}

解决方案

EDIT: TL/DR version: CLR JIT will inline one level of recursion, MSVC 8 SP1 will not without #pragma inline_recursion(on). And you should run the C# version outside of a debugger to get the fully optimized JIT.

I got similar results to acidzombie24 with C# vs. C++ using VS 2008 SP1 on a Core2 Duo laptop running Vista plugged in with "high performance" power settings (~1600 ms vs. ~3800 ms). It's kind of tricky to see the optimized JIT'd C# code, but for x86 it boils down to this:

00000000 55               push        ebp  
00000001 8B EC            mov         ebp,esp 
00000003 57               push        edi  
00000004 56               push        esi  
00000005 53               push        ebx  
00000006 8B F1            mov         esi,ecx 
00000008 83 FE 02         cmp         esi,2 
0000000b 7D 07            jge         00000014 
0000000d 8B C6            mov         eax,esi 
0000000f 5B               pop         ebx  
00000010 5E               pop         esi  
00000011 5F               pop         edi  
00000012 5D               pop         ebp  
00000013 C3               ret              
            return fib(n - 1) + fib(n - 2);
00000014 8D 7E FF         lea         edi,[esi-1] 
00000017 83 FF 02         cmp         edi,2 
0000001a 7D 04            jge         00000020 
0000001c 8B DF            mov         ebx,edi 
0000001e EB 19            jmp         00000039 
00000020 8D 4F FF         lea         ecx,[edi-1] 
00000023 FF 15 F8 2F 12 00 call        dword ptr ds:[00122FF8h] 
00000029 8B D8            mov         ebx,eax 
0000002b 4F               dec         edi  
0000002c 4F               dec         edi  
0000002d 8B CF            mov         ecx,edi 
0000002f FF 15 F8 2F 12 00 call        dword ptr ds:[00122FF8h] 
00000035 03 C3            add         eax,ebx 
00000037 8B D8            mov         ebx,eax 
00000039 4E               dec         esi  
0000003a 4E               dec         esi  
0000003b 83 FE 02         cmp         esi,2 
0000003e 7D 04            jge         00000044 
00000040 8B D6            mov         edx,esi 
00000042 EB 19            jmp         0000005D 
00000044 8D 4E FF         lea         ecx,[esi-1] 
00000047 FF 15 F8 2F 12 00 call        dword ptr ds:[00122FF8h] 
0000004d 8B F8            mov         edi,eax 
0000004f 4E               dec         esi  
00000050 4E               dec         esi  
00000051 8B CE            mov         ecx,esi 
00000053 FF 15 F8 2F 12 00 call        dword ptr ds:[00122FF8h] 
00000059 03 C7            add         eax,edi 
0000005b 8B D0            mov         edx,eax 
0000005d 03 DA            add         ebx,edx 
0000005f 8B C3            mov         eax,ebx 
00000061 5B               pop         ebx  
00000062 5E               pop         esi  
00000063 5F               pop         edi  
00000064 5D               pop         ebp  
00000065 C3               ret  

In contrast to the C++ generated code (/Ox /Ob2 /Oi /Ot /Oy /GL /Gr):

int fib(int n)
{ 
00B31000 56               push        esi  
00B31001 8B F1            mov         esi,ecx 
    if (n < 2) return n; 
00B31003 83 FE 02         cmp         esi,2 
00B31006 7D 04            jge         fib+0Ch (0B3100Ch) 
00B31008 8B C6            mov         eax,esi 
00B3100A 5E               pop         esi  
00B3100B C3               ret              
00B3100C 57               push        edi  
    return fib(n - 1) + fib(n - 2); 
00B3100D 8D 4E FE         lea         ecx,[esi-2] 
00B31010 E8 EB FF FF FF   call        fib (0B31000h) 
00B31015 8D 4E FF         lea         ecx,[esi-1] 
00B31018 8B F8            mov         edi,eax 
00B3101A E8 E1 FF FF FF   call        fib (0B31000h) 
00B3101F 03 C7            add         eax,edi 
00B31021 5F               pop         edi  
00B31022 5E               pop         esi  
} 
00B31023 C3               ret              

The C# version basically inlines fib(n-1) and fib(n-2). For a function that is so call heavy, reducing the number of function calls is the key to speed. Replacing fib with the following:

int fib(int n);

int fib2(int n) 
{ 
    if (n < 2) return n; 
    return fib(n - 1) + fib(n - 2); 
} 

int fib(int n)
{ 
    if (n < 2) return n; 
    return fib2(n - 1) + fib2(n - 2); 
} 

Gets it down to ~1900 ms. Incidentally, if I use #pragma inline_recursion(on) I get similar results with the original fib. Unrolling it one more level:

int fib(int n);

int fib3(int n) 
{ 
    if (n < 2) return n; 
    return fib(n - 1) + fib(n - 2); 
} 

int fib2(int n) 
{ 
    if (n < 2) return n; 
    return fib3(n - 1) + fib3(n - 2); 
} 

int fib(int n)
{ 
    if (n < 2) return n; 
    return fib2(n - 1) + fib2(n - 2); 
} 

Gets it down to ~1380 ms. Beyond that it tapers off.

So it appears that the CLR JIT for my machine will inline recursive calls one level, whereas the C++ compiler will not do that by default.

If only all performance critical code were like fib!

这篇关于为什么.NET在这种情况下,速度比C ++?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆