在C ++中，我应该麻烦缓存变量，还是让编译器做优化？（混叠） [英] In C++, should I bother to cache variables, or let the compiler do the optimization? (Aliasing)

查看：104 发布时间：2016/10/22 17:06:51 c++ performance caching optimization strict-aliasing

本文介绍了在C ++中，我应该麻烦缓存变量，还是让编译器做优化？（混叠）的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

请考虑以下代码（ p 的类型为 unsigned char * 和 > width 是一个整数类型，完全是未知的，取决于我们使用的某个外部库的版本）：

  for（unsigned x = 0; x< static_cast< unsigned>（bitmap-> width）; ++ x）
 {
 * p ++ = 0xAA; 
 * p ++ = 0xBB; 
 * p ++ = 0xCC; 
}

_{/ s>}

可以这样写：
无符号宽度（static_cast< unsigned>（bitmap-> width））; for（unsigned x = 0; x< width; ++ x） { * p ++ = 0xAA; * p ++ = 0xBB; * p ++ = 0xCC; }
...或者这是编译器优化？ >

_{您认为是什么是更好的代码？}

_{来自编辑的注释（Ike）：对于那些想知道罢工文本的人，原来的问题，作为措辞，危险地接近偏离主题的领土，并非常接近被关闭，尽管正面反馈。这些都被打破了。}
解决方案
如果您的问题没有解决，第一眼，我认为编译器可以生成两个版本的等效装配，优化标志激活。当我检查它，我惊讶地看到结果：

源 unoptimized.cpp

注意：此代码并不意味着执行。
struct bitmap_t { long long width; } bitmap; int main（int argc，char ** argv） { for（unsigned x = 0; x< static_cast< unsigned>（bitmap.width）; ++ x） { argv [x] [0] ='\0'; } return 0; }

源 optimized.cpp

注意：此代码不适用于执行。 > struct bitmap_t { long long width; } bitmap; int main（int argc，char ** argv） { const unsigned width = static_cast< unsigned>（bitmap.width）; for（unsigned x = 0; x< width; ++ x） { argv [x] [0] ='\0'; } return 0; }

编译

$ g ++ -s -O3 unoptimized.cpp

$ g ++ -s -O3 optimized.cpp

程序集（unoptimized.s）

.fileunoptimized.cpp .text .p2align 4，，15 .globl main .type main，@function main： .LFB0： .cfi_startproc .cfi_personality 0x3，__ gxx_personality_v0 movl bitmap（％rip），％eax testl％eax，％eax je .L2 xorl％eax，％eax 。 p2align 4，，10 .p2align 3 .L3： mov％eax，％edx addl $ 1，％eax movq（％rsi，％rdx， 8），％rdx movb $ 0，（％rdx） cmpl bitmap（％rip），％eax jb .L3 .L2： xorl％eax ，％eax ret .cfi_endproc .LFE0： .size main，。-main .globl bitmap .bss 。 align 8 .type bitmap，@object .size bitmap，8 bitmap： .zero 8 .identGCC：（GNU）4.4.7 20120313 （Red Hat 4.4.7-16） .section .note.GNU-stack，，@ progbits

Assembly（optimized.s）

.fileoptimized.cpp .text .p2align 4，，15 .globl main .type main，@function main： .LFB0： .cfi_startproc .cfi_personality 0x3，__ gxx_personality_v0 movl bitmap（％rip），％eax testl％eax，％eax je .L2 subl $ 1，％eax leaq 8 （，％rax，8），％rcx xorl％eax，％eax .p2align 4，and 10 .p2align 3 .L3： movq ％rsi，％rax），％rdx addq $ 8，％rax cmpq％rcx，％rax movb $ 0（％rdx） jne .L3 .L2： xorl％eax，％eax ret .cfi_endproc .LFE0： .size main，。-main .globl bitmap .bss .align 8 .type位图，@object .size位图，8 位图： .zero 8 .ident gcc：（GNU）4.4.7 20120313（Red Hat 4.4.7-16） .section .note.GNU-stack，，@ progbits

diff

$ diff -uN unoptimized.s optimized .s --- unoptimized.s 2015-11-24 16：11：55.837922223 +0000 +++ optimized.s 2015-11-24 16：12：02.628922941 +0000 @@ -1,4 +1,4 @@ - .fileunoptimized.cpp + .fileoptimized.cpp .text .p2align 4，，15 .globl main @@ -10,16 +10,17 @@ movl位图（％rip），％eax testl％eax，％eax je .L2 + subl $ 1，％eax + leaq 8（，％rax，8），％rcx xorl％eax，％eax .p2align 4，，10 .p2align 3 .L3： - mov％eax，％edx - addl $ 1，％eax - movq（％rsi，％rdx， 8），％rdx + movq（％rsi，％rax），％rdx + addq $ 8，％rax + cmpq％rcx，％rax movb $ 0，％rdx） - cmpl位图（％rip），％eax - jb .L3 + jne .L3 .L2： xorl％eax，％eax ret

生成的优化版本确实已加载（ lea c> c> ） width a href =http://stackoverflow.com/questions/3852909/movq-assembly-function> movq ）。
当我得到时间时，我最终会发布一些基准。好问题。 Consider the following code (p is of type unsigned char* and bitmap->width is of some integer type, exactly which is unknown and depends on which version of some external library we're using): for (unsigned x = 0; x < static_cast<unsigned>(bitmap->width); ++x) { *p++ = 0xAA; *p++ = 0xBB; *p++ = 0xCC; } _{Is it worth optimizing it [..]} Could there be a case where this could yield more efficient results by writing: unsigned width(static_cast<unsigned>(bitmap->width)); for (unsigned x = 0; x < width; ++x) { *p++ = 0xAA; *p++ = 0xBB; *p++ = 0xCC; } ... or is this trivial for the compiler to optimize? _{What would you consider to be "better" code?} _{Note from editor (Ike): for those wondering about the strikeout text, the original question, as phrased, was dangerously close to off-topic territory and was very close to being closed in spite of positive feedback. These have been stricken out. Yet please do not punish the answerers who addressed these stricken sections of the question.} 解决方案 At first glance, I thought the compiler could generate equivalent assembly for both versions with optimization flags activated. When I checked it, I was surprised to see the result: Source unoptimized.cpp note: this code is not meant to be executed. struct bitmap_t { long long width; } bitmap; int main(int argc, char** argv) { for (unsigned x = 0 ; x < static_cast<unsigned>(bitmap.width) ; ++x) { argv[x][0] = '\0'; } return 0; } Source optimized.cpp note: this code is not meant to be executed. struct bitmap_t { long long width; } bitmap; int main(int argc, char** argv) { const unsigned width = static_cast<unsigned>(bitmap.width); for (unsigned x = 0 ; x < width ; ++x) { argv[x][0] = '\0'; } return 0; } Compilation $ g++ -s -O3 unoptimized.cpp $ g++ -s -O3 optimized.cpp Assembly (unoptimized.s) .file "unoptimized.cpp" .text .p2align 4,,15 .globl main .type main, @function main: .LFB0: .cfi_startproc .cfi_personality 0x3,__gxx_personality_v0 movl bitmap(%rip), %eax testl %eax, %eax je .L2 xorl %eax, %eax .p2align 4,,10 .p2align 3 .L3: mov %eax, %edx addl $1, %eax movq (%rsi,%rdx,8), %rdx movb $0, (%rdx) cmpl bitmap(%rip), %eax jb .L3 .L2: xorl %eax, %eax ret .cfi_endproc .LFE0: .size main, .-main .globl bitmap .bss .align 8 .type bitmap, @object .size bitmap, 8 bitmap: .zero 8 .ident "GCC: (GNU) 4.4.7 20120313 (Red Hat 4.4.7-16)" .section .note.GNU-stack,"",@progbits Assembly (optimized.s) .file "optimized.cpp" .text .p2align 4,,15 .globl main .type main, @function main: .LFB0: .cfi_startproc .cfi_personality 0x3,__gxx_personality_v0 movl bitmap(%rip), %eax testl %eax, %eax je .L2 subl $1, %eax leaq 8(,%rax,8), %rcx xorl %eax, %eax .p2align 4,,10 .p2align 3 .L3: movq (%rsi,%rax), %rdx addq $8, %rax cmpq %rcx, %rax movb $0, (%rdx) jne .L3 .L2: xorl %eax, %eax ret .cfi_endproc .LFE0: .size main, .-main .globl bitmap .bss .align 8 .type bitmap, @object .size bitmap, 8 bitmap: .zero 8 .ident "GCC: (GNU) 4.4.7 20120313 (Red Hat 4.4.7-16)" .section .note.GNU-stack,"",@progbits diff $ diff -uN unoptimized.s optimized.s --- unoptimized.s 2015-11-24 16:11:55.837922223 +0000 +++ optimized.s 2015-11-24 16:12:02.628922941 +0000 @@ -1,4 +1,4 @@ - .file "unoptimized.cpp" + .file "optimized.cpp" .text .p2align 4,,15 .globl main @@ -10,16 +10,17 @@ movl bitmap(%rip), %eax testl %eax, %eax je .L2 + subl $1, %eax + leaq 8(,%rax,8), %rcx xorl %eax, %eax .p2align 4,,10 .p2align 3 .L3: - mov %eax, %edx - addl $1, %eax - movq (%rsi,%rdx,8), %rdx + movq (%rsi,%rax), %rdx + addq $8, %rax + cmpq %rcx, %rax movb $0, (%rdx) - cmpl bitmap(%rip), %eax - jb .L3 + jne .L3 .L2: xorl %eax, %eax ret The generated assembly for the optimized version does actually load (lea) the width constant unlike the unoptimized version which computes the width offset at each iteration (movq). When I'll get time, I eventually post some benchmark on that. Good question. 这篇关于在C ++中，我应该麻烦缓存变量，还是让编译器做优化？（混叠）的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

~~查看全文~~

在C ++中，我应该麻烦缓存变量，还是让编译器做优化？（混叠） [英] In C++, should I bother to cache variables, or let the compiler do the optimization? (Aliasing)

问题描述

源 `unoptimized.cpp`

源 `optimized.cpp`

编译

程序集（unoptimized.s）

Assembly（optimized.s）

diff

Source `unoptimized.cpp`

Source `optimized.cpp`

Compilation

Assembly (unoptimized.s)

Assembly (optimized.s)

diff

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

在C ++中，我应该麻烦缓存变量，还是让编译器做优化？ （混叠） [英] In C++, should I bother to cache variables, or let the compiler do the optimization? (Aliasing)

问题描述

源 unoptimized.cpp

源 optimized.cpp

编译

程序集（unoptimized.s）

Assembly（optimized.s）

diff

Source unoptimized.cpp

Source optimized.cpp

Compilation

Assembly (unoptimized.s)

Assembly (optimized.s)

diff

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

在C ++中，我应该麻烦缓存变量，还是让编译器做优化？（混叠） [英] In C++, should I bother to cache variables, or let the compiler do the optimization? (Aliasing)

源 `unoptimized.cpp`

源 `optimized.cpp`

Source `unoptimized.cpp`

Source `optimized.cpp`

登录关闭