现代的C ++编译器内联函数是什么? [英] Do modern C++ compilers inline functions which are called exactly once?

查看:155
本文介绍了现代的C ++编译器内联函数是什么?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如同,我的头文件是:

  class A 
{
void复杂);
}

和我的源文件

  void A :: Complicated()
{
...真长函数...
}

可以将源文件拆分为

  void DoInitialStuff(通过ref或值传递必要的变量)
{
...
}
void HandleCaseA(通过ref或值传递必要的变量)
{
...
}
void HandleCaseB(通过ref或值传递必要的变量)
{
...
}
void FinishUp (通过ref或值传递必要的变量)
{
...
}
void A :: Complicated()
{
...
DoInitialStuff(...);
switch ...
HandleCaseA(...)
HandleCaseB(...)
...
FinishUp(...)
}

完全是为了提高可读性,并且不会对性能造成任何影响?

解决方案

您应该标记函数 static ,以便编译器知道它们是翻译单元的本地。



没有 static 编译器不能假设(禁止LTO / WPA)函数只被调用一次,不太可能内联。



使用 LLVM试用 page。



也就是说,代码首先是可读性,微优化(以及这样的微调是微优化)



<$>

code> #include< cstdio>

static void foo(int i){
int m = i%3;
printf(%d%d,i,m);
}

int main(int argc,char * argv []){
for(int i = 0; i!= argc; ++ i){
foo(i);
}
}

使用 static

 ; ModuleID ='/tmp/webcompile/_27689_0.bc'
target datalayout =ep:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32: 32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128: n8:16:32:64
target triple =x86_64-unknown-linux-gnu

@ .str = private常数[6 x i8] c%d%d \\ \\00; < [6×i8] *> [#uses = 1]

定义i32 @main(i32%argc,i8 ** nocapture%argv)nounwind {
entry:
%cmp4 = icmp eq i32%argc ,0; < i1> [#uses = 1]
br i1%cmp4,label%for.end,label%for.body

for.body:; preds =%for.body,%entry
%0 = phi i32 [%inc,%for.body],[0,%entry]; < i32> [#uses = 3]
%rem.i = srem i32%0,3; < i32> [#uses = 1]
%call.i = tail call i32(i8 *,...)* @printf(i8 * getelementptr inbounds([6 x i8] * @ .str,i64 0, ),i32%0,i32%rem.i)nounwind; < i32> [#uses = 0]
%inc = add nsw i32%0,1; < i32> [#uses = 2]
%exitcond = icmp eq i32%inc,%argc; < i1> [#uses = 1]
br i1%exitcond,label%for.end,label%for.body

for.end:; preds =%for.body,%entry
ret i32 0
}

声明i32 @printf(i8 * nocapture,...)nounwind

没有 static

 ; ModuleID ='/tmp/webcompile/_27859_0.bc'
target datalayout =ep:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32: 32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128: n8:16:32:64
target triple =x86_64-unknown-linux-gnu

@ .str = private常数[6 x i8] c%d%d \\ \\00; < [6×i8] *> [#uses = 1]

define void @foo(int)(i32%i)nounwind {
entry:
%rem = srem i32%i, < i32> [#uses = 1]
%call = tail call i32(i8 *,...)* @printf(i8 * getelementptr inbounds([6 x i8] * @ .str,i64 0,i64 0) i32%i,i32%rem); < i32> [#uses = 0]
ret void
}

声明i32 @printf(i8 * nocapture,...)nounwind

定义i32 @ main(i32%argc,i8 ** nocapture%argv)nounwind {
entry:
%cmp4 = icmp eq i32%argc,0; < i1> [#uses = 1]
br i1%cmp4,label%for.end,label%for.body

for.body:; preds =%for.body,%entry
%0 = phi i32 [%inc,%for.body],[0,%entry]; < i32> [#uses = 3]
%rem.i = srem i32%0,3; < i32> [#uses = 1]
%call.i = tail call i32(i8 *,...)* @printf(i8 * getelementptr inbounds([6 x i8] * @ .str,i64 0,i64 0 ),i32%0,i32%rem.i)nounwind; < i32> [#uses = 0]
%inc = add nsw i32%0,1; < i32> [#uses = 2]
%exitcond = icmp eq i32%inc,%argc; < i1> [#uses = 1]
br i1%exitcond,label%for.end,label%for.body

for.end:; preds =%for.body,%entry
ret i32 0
}


As in, say my header file is:

class A
{
    void Complicated();
}

And my source file

void A::Complicated()
{
    ...really long function...
}

Can I split the source file into

void DoInitialStuff(pass necessary vars by ref or value)
{
    ...
}
void HandleCaseA(pass necessary vars by ref or value)
{
    ...
}
void HandleCaseB(pass necessary vars by ref or value)
{
    ...
}
void FinishUp(pass necessary vars by ref or value)
{
    ...
}
void A::Complicated()
{
    ...
    DoInitialStuff(...);
    switch ...
        HandleCaseA(...)
        HandleCaseB(...)
    ...
    FinishUp(...)
}

Entirely for readability and without any fear of impact in terms of performance?

解决方案

You should mark the functions static so that the compiler know they are local to that translation unit.

Without static the compiler cannot assume (barring LTO / WPA) that the function is only called once, so is less likely to inline it.

Demonstration using the LLVM Try Out page.

That said, code for readability first, micro-optimizations (and such tweaking is a micro-optimization) should only come after performance measures.


Example:

#include <cstdio>

static void foo(int i) {
  int m = i % 3;
  printf("%d %d", i, m);
}

int main(int argc, char* argv[]) {
  for (int i = 0; i != argc; ++i) {
    foo(i);
  }
}

Produces with static:

; ModuleID = '/tmp/webcompile/_27689_0.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"

@.str = private constant [6 x i8] c"%d %d\00"     ; <[6 x i8]*> [#uses=1]

define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
  %cmp4 = icmp eq i32 %argc, 0                    ; <i1> [#uses=1]
  br i1 %cmp4, label %for.end, label %for.body

for.body:                                         ; preds = %for.body, %entry
  %0 = phi i32 [ %inc, %for.body ], [ 0, %entry ] ; <i32> [#uses=3]
  %rem.i = srem i32 %0, 3                         ; <i32> [#uses=1]
  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %0, i32 %rem.i) nounwind ; <i32> [#uses=0]
  %inc = add nsw i32 %0, 1                        ; <i32> [#uses=2]
  %exitcond = icmp eq i32 %inc, %argc             ; <i1> [#uses=1]
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret i32 0
}

declare i32 @printf(i8* nocapture, ...) nounwind

Without static:

; ModuleID = '/tmp/webcompile/_27859_0.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"

@.str = private constant [6 x i8] c"%d %d\00"     ; <[6 x i8]*> [#uses=1]

define void @foo(int)(i32 %i) nounwind {
entry:
  %rem = srem i32 %i, 3                           ; <i32> [#uses=1]
  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %i, i32 %rem) ; <i32> [#uses=0]
  ret void
}

declare i32 @printf(i8* nocapture, ...) nounwind

define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
  %cmp4 = icmp eq i32 %argc, 0                    ; <i1> [#uses=1]
  br i1 %cmp4, label %for.end, label %for.body

for.body:                                         ; preds = %for.body, %entry
  %0 = phi i32 [ %inc, %for.body ], [ 0, %entry ] ; <i32> [#uses=3]
  %rem.i = srem i32 %0, 3                         ; <i32> [#uses=1]
  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %0, i32 %rem.i) nounwind ; <i32> [#uses=0]
  %inc = add nsw i32 %0, 1                        ; <i32> [#uses=2]
  %exitcond = icmp eq i32 %inc, %argc             ; <i1> [#uses=1]
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body, %entry
  ret i32 0
}

这篇关于现代的C ++编译器内联函数是什么?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆