在C ++中,我应该麻烦缓存变量,还是让编译器做优化? (混叠) [英] In C++, should I bother to cache variables, or let the compiler do the optimization? (Aliasing)
问题描述
请考虑以下代码( p
的类型为 unsigned char *
和 > width
是一个整数类型,完全是未知的,取决于我们使用的某个外部库的版本):
for(unsigned x = 0; x< static_cast< unsigned>(bitmap-> width); ++ x)
{
* p ++ = 0xAA;
* p ++ = 0xBB;
* p ++ = 0xCC;
}
/ s>
可以这样写:
无符号宽度(static_cast< unsigned>(bitmap-> width));
for(unsigned x = 0; x< width; ++ x)
{
* p ++ = 0xAA;
* p ++ = 0xBB;
* p ++ = 0xCC;
}
...或者这是编译器优化? >
您认为是什么是更好的代码?
来自编辑的注释(Ike):对于那些想知道罢工文本的人,原来的问题,作为措辞,危险地接近偏离主题的领土,并非常接近被关闭,尽管正面反馈。这些都被打破了。
如果您的问题没有解决,第一眼,我认为编译器可以生成两个版本的等效装配,优化标志激活。当我检查它,我惊讶地看到结果:
源 unoptimized.cpp
注意:此代码并不意味着执行。
struct bitmap_t
{
long long width;
} bitmap;
int main(int argc,char ** argv)
{
for(unsigned x = 0; x< static_cast< unsigned>(bitmap.width); ++ x)
{
argv [x] [0] ='\0';
}
return 0;
}
源 optimized.cpp
注意:此代码不适用于执行。 > struct bitmap_t
{
long long width;
} bitmap;
int main(int argc,char ** argv)
{
const unsigned width = static_cast< unsigned>(bitmap.width);
for(unsigned x = 0; x< width; ++ x)
{
argv [x] [0] ='\0';
}
return 0;
}
编译
-
$ g ++ -s -O3 unoptimized.cpp
-
$ g ++ -s -O3 optimized.cpp
程序集(unoptimized.s)
.fileunoptimized.cpp
.text
.p2align 4,,15
.globl main
.type main,@function
main:
.LFB0:
.cfi_startproc
.cfi_personality 0x3,__ gxx_personality_v0
movl bitmap(%rip),%eax
testl%eax,%eax
je .L2
xorl%eax,%eax
。 p2align 4,,10
.p2align 3
.L3:
mov%eax,%edx
addl $ 1,%eax
movq(%rsi,%rdx, 8),%rdx
movb $ 0,(%rdx)
cmpl bitmap(%rip),%eax
jb .L3
.L2:
xorl%eax ,%eax
ret
.cfi_endproc
.LFE0:
.size main,。-main
.globl bitmap
.bss
。 align 8
.type bitmap,@object
.size bitmap,8
bitmap:
.zero 8
.identGCC:(GNU)4.4.7 20120313 (Red Hat 4.4.7-16)
.section .note.GNU-stack,,@ progbits
Assembly(optimized.s)
.fileoptimized.cpp
.text
.p2align 4,,15
.globl main
.type main,@function
main:
.LFB0:
.cfi_startproc
.cfi_personality 0x3,__ gxx_personality_v0
movl bitmap(%rip),%eax
testl%eax,%eax
je .L2
subl $ 1,%eax
leaq 8 (,%rax,8),%rcx
xorl%eax,%eax
.p2align 4,and 10
.p2align 3
.L3:
movq %rsi,%rax),%rdx
addq $ 8,%rax
cmpq%rcx,%rax
movb $ 0(%rdx)
jne .L3
.L2:
xorl%eax,%eax
ret
.cfi_endproc
.LFE0:
.size main,。-main
.globl bitmap
.bss
.align 8
.type位图,@object
.size位图,8
位图:
.zero 8
.ident gcc:(GNU)4.4.7 20120313(Red Hat 4.4.7-16)
.section .note.GNU-stack,,@ progbits
diff
$ diff -uN unoptimized.s optimized .s
--- unoptimized.s 2015-11-24 16:11:55.837922223 +0000
+++ optimized.s 2015-11-24 16:12:02.628922941 +0000
@@ -1,4 +1,4 @@
- .fileunoptimized.cpp
+ .fileoptimized.cpp
.text
.p2align 4, ,15
.globl main
@@ -10,16 +10,17 @@
movl位图(%rip),%eax
testl%eax,%eax
je .L2
+ subl $ 1,%eax
+ leaq 8(,%rax,8),%rcx
xorl%eax,%eax
.p2align 4, ,10
.p2align 3
.L3:
- mov%eax,%edx
- addl $ 1,%eax
- movq(%rsi,%rdx, 8),%rdx
+ movq(%rsi,%rax),%rdx
+ addq $ 8,%rax
+ cmpq%rcx,%rax
movb $ 0, %rdx)
- cmpl位图(%rip),%eax
- jb .L3
+ jne .L3
.L2:
xorl%eax,%eax
ret
生成的优化版本确实已加载( lea $ c $ (
)。 width
a href =http://stackoverflow.com/questions/3852909/movq-assembly-function> movq
当我得到时间时,我最终会发布一些基准。好问题。
Consider the following code (p
is of type unsigned char*
and bitmap->width
is of some integer type, exactly which is unknown and depends on which version of some external library we're using):
for (unsigned x = 0; x < static_cast<unsigned>(bitmap->width); ++x)
{
*p++ = 0xAA;
*p++ = 0xBB;
*p++ = 0xCC;
}
Is it worth optimizing it [..]
Could there be a case where this could yield more efficient results by writing:
unsigned width(static_cast<unsigned>(bitmap->width));
for (unsigned x = 0; x < width; ++x)
{
*p++ = 0xAA;
*p++ = 0xBB;
*p++ = 0xCC;
}
... or is this trivial for the compiler to optimize?
What would you consider to be "better" code?
Note from editor (Ike): for those wondering about the strikeout text, the original question, as phrased, was dangerously close to off-topic territory and was very close to being closed in spite of positive feedback. These have been stricken out. Yet please do not punish the answerers who addressed these stricken sections of the question.
At first glance, I thought the compiler could generate equivalent assembly for both versions with optimization flags activated. When I checked it, I was surprised to see the result:
Source unoptimized.cpp
note: this code is not meant to be executed.
struct bitmap_t
{
long long width;
} bitmap;
int main(int argc, char** argv)
{
for (unsigned x = 0 ; x < static_cast<unsigned>(bitmap.width) ; ++x)
{
argv[x][0] = '\0';
}
return 0;
}
Source optimized.cpp
note: this code is not meant to be executed.
struct bitmap_t
{
long long width;
} bitmap;
int main(int argc, char** argv)
{
const unsigned width = static_cast<unsigned>(bitmap.width);
for (unsigned x = 0 ; x < width ; ++x)
{
argv[x][0] = '\0';
}
return 0;
}
Compilation
$ g++ -s -O3 unoptimized.cpp
$ g++ -s -O3 optimized.cpp
Assembly (unoptimized.s)
.file "unoptimized.cpp"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
movl bitmap(%rip), %eax
testl %eax, %eax
je .L2
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L3:
mov %eax, %edx
addl $1, %eax
movq (%rsi,%rdx,8), %rdx
movb $0, (%rdx)
cmpl bitmap(%rip), %eax
jb .L3
.L2:
xorl %eax, %eax
ret
.cfi_endproc
.LFE0:
.size main, .-main
.globl bitmap
.bss
.align 8
.type bitmap, @object
.size bitmap, 8
bitmap:
.zero 8
.ident "GCC: (GNU) 4.4.7 20120313 (Red Hat 4.4.7-16)"
.section .note.GNU-stack,"",@progbits
Assembly (optimized.s)
.file "optimized.cpp"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
movl bitmap(%rip), %eax
testl %eax, %eax
je .L2
subl $1, %eax
leaq 8(,%rax,8), %rcx
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L3:
movq (%rsi,%rax), %rdx
addq $8, %rax
cmpq %rcx, %rax
movb $0, (%rdx)
jne .L3
.L2:
xorl %eax, %eax
ret
.cfi_endproc
.LFE0:
.size main, .-main
.globl bitmap
.bss
.align 8
.type bitmap, @object
.size bitmap, 8
bitmap:
.zero 8
.ident "GCC: (GNU) 4.4.7 20120313 (Red Hat 4.4.7-16)"
.section .note.GNU-stack,"",@progbits
diff
$ diff -uN unoptimized.s optimized.s
--- unoptimized.s 2015-11-24 16:11:55.837922223 +0000
+++ optimized.s 2015-11-24 16:12:02.628922941 +0000
@@ -1,4 +1,4 @@
- .file "unoptimized.cpp"
+ .file "optimized.cpp"
.text
.p2align 4,,15
.globl main
@@ -10,16 +10,17 @@
movl bitmap(%rip), %eax
testl %eax, %eax
je .L2
+ subl $1, %eax
+ leaq 8(,%rax,8), %rcx
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L3:
- mov %eax, %edx
- addl $1, %eax
- movq (%rsi,%rdx,8), %rdx
+ movq (%rsi,%rax), %rdx
+ addq $8, %rax
+ cmpq %rcx, %rax
movb $0, (%rdx)
- cmpl bitmap(%rip), %eax
- jb .L3
+ jne .L3
.L2:
xorl %eax, %eax
ret
The generated assembly for the optimized version does actually load (lea
) the width
constant unlike the unoptimized version which computes the width
offset at each iteration (movq
).
When I'll get time, I eventually post some benchmark on that. Good question.
这篇关于在C ++中,我应该麻烦缓存变量,还是让编译器做优化? (混叠)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!