为什么STRCMP的收益是不同的? [英] Why the returns of strcmp is different?

查看:159
本文介绍了为什么STRCMP的收益是不同的?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

下面是C code和我用gcc编译

 的char * A =A;
字符* D =D;
的printf(%d个\\ N,STRCMP(A,D));
的printf(%d个\\ N,STRCMP(一,D));
的printf(%d个\\ N,STRCMP(A,D));

当我与 -O 编译输出

  -1
-3
-1

当我没有 -O 编译然后输出

  -1
-3
-3

为什么输出不同的是,什么是 STRCMP

的code
解决方案

  

为什么输出不同


由于所有重要的是在登录的返回值(正,负或零)。 的strcmp()不需要返回+1或-1,也没有返回一致的值。我怀疑,在第一和第三的情况下,编译器优化掉调用的strcmp(),并把-1到返回值的地方。在第二种情况下,我认为函数实际上是调用。


  

是什么STRCMP的code?


这是一个事实,即它看似返回字符$ C $的第一个不同字符的CS之间的差异推导,我会说这是glibc的的的strcmp()

  INT
 的strcmp(P1,P2)
      为const char * P1;
      为const char * P2;
 {
   注册const的无符号字符* S1 =(const的无符号字符*)P1;
   注册const的无符号字符* S2 =(const的无符号字符*)P2;
   unsigned char型C1,C2;   做
     {
       C1 =(无符号字符)* S1 ++;
       C2 =(无符号字符)* S2 ++;
       如果(C1 =='\\ 0')
     返回C1 - C2;
     }
   而(C1 == C2);   返回C1 - C2;
 }

编辑: @AndreyT不相信我,所以这里对我产生的汇编GCC 4.2(OS X 10.7.5 64位英特尔,默认的优化级别 - 无标志):

  .section伪__TEXT,__文本,定期,pure_instructions
    .globl _main
    .align伪4,0×90
_主要:
Leh_func_begin1:
    pushq%RBP
Ltmp0:
    MOVQ%RSP,RBP%
Ltmp1:
    SUBQ $ 32%,可吸入悬浮粒子
Ltmp2:
    leaq L_.str(%RIP),RAX%
    MOVQ%RAX,-16(RBP%)
    leaq L_.str1(%RIP),RAX%
    MOVQ%RAX,-24(RBP%)
    MOVL $ -1%ECX; < - THIS!
    xorb%DL,DL%
    leaq L_.str2(%RIP),RSI%
    MOVQ%RSI,RDI%
    MOVL%ECX,ESI%
    MOVQ%RAX,-32(RBP%)
    MOVB%DL,%人
    callq _printf; < - 为`STRCMP没有呼叫()`为止!
    MOVQ -16(RBP%),RAX%
    MOVQ%RAX,%RDI
    MOVQ -32(RBP%),RSI%
    callq _strcmp; < - STRCMP()
    MOVL%EAX,ECX%
    xorb%DL,DL%
    leaq L_.str2(%RIP),%RDI
    MOVL%ECX,ESI%
    MOVB%DL,%人
    callq _printf; < - 的printf()
    MOVQ -16(RBP%),RAX%
    MOVQ -24(RBP%),%RCX
    MOVQ%RAX,%RDI
    MOVQ%RCX,RSI%
    callq _strcmp; < - STRCMP()
    MOVL%EAX,ECX%
    xorb%DL,DL%
    leaq L_.str2(%RIP),%RDI
    MOVL%ECX,ESI%
    MOVB%DL,%人
    callq _printf; < - 的printf()
    MOVL $ 0 -8(%RBP)
    MOVL -8(%RBP),EAX%
    MOVL%EAX,-4(RBP%)
    MOVL -4(RBP%),%EAX
    addq $ 32%,可吸入悬浮粒子
    popq%RBP
    RET
Leh_func_end1:    .section伪__TEXT,__ CString的,cstring_literals
L_.str:
    .asciz一个L_.str1:
    .ascizDL_.str2:
    .asciz%d个\\ N    .section伪__TEXT,__ eh_frame,凝聚,no_toc + strip_static_syms + live_support
EH_frame0:
Lsection_eh_frame:
Leh_frame_common:
Lset0 = Leh_frame_common_end,Leh_frame_common_begin
    。长Lset0
Leh_frame_common_begin:
    。长0
    .BYTE 1
    .ascizZR
    .BYTE 1
    .BYTE 120
    .BYTE 16
    .BYTE 1
    .BYTE 16
    .BYTE 12
    .BYTE 7
    .BYTE 8
    .BYTE 144
    .BYTE 1
    .align伪3
Leh_frame_common_end:
    .globl _main.eh
_main.eh:
Lset1 = Leh_frame_end1,Leh_frame_begin1
    。长Lset1
Leh_frame_begin1:
Lset2 = Leh_frame_begin1,Leh_frame_common
    。长Lset2
Ltmp3:
    .quad Leh_func_begin1-Ltmp3
Lset3 = Leh_func_end1,Leh_func_begin1
    .quad Lset3
    .BYTE 0
    .BYTE 4
Lset4 = Ltmp0,Leh_func_begin1
    。长Lset4
    .BYTE 14
    .BYTE 16
    .BYTE 134
    .BYTE 2
    .BYTE 4
Lset5 = Ltmp1,Ltmp0
    。长Lset5
    .BYTE 13
    .BYTE 6
    .align伪3
Leh_frame_end1:
.subsections_via_symbols

和原始出处code:

 的#include<&stdio.h中GT;
#包括LT&;&string.h中GT;诠释的main()
{
    为const char * A =A;
    为const char * D =D;
    的printf(%d个\\ N,STRCMP(A,D));
    的printf(%d个\\ N,STRCMP(一,D));
    的printf(%d个\\ N,STRCMP(A,D));    返回0;
}

和它产生的输出(屏幕截图有一个更好的证明):

Here is the C code and I compiled with gcc

char *a="a";
char *d="d";
printf("%d\n", strcmp("a", "d"));
printf("%d\n", strcmp(a, "d"));
printf("%d\n", strcmp(a, d));

When I compiled with -O the output is

-1
-3
-1

When I compiled without -O then output is

-1
-3
-3

Why the output is different and what is the code of strcmp?

解决方案

Why the output is different

Because all that matters is the sign (positive, negative or zero) of the return value. strcmp() is not required to return +1 or -1, nor does it have to return consistent values. I suspect that in the first and third case, the compiler optimizes away the call to strcmp() and puts -1 into the place of the return value. In the second case, I think the function is actually called.

what is the code of strcmp?

Deducing from the fact that it seemingly returns the difference between the character codes of the first differing character, I'd say this is glibc's strcmp():

int
 strcmp (p1, p2)
      const char *p1;
      const char *p2;
 {
   register const unsigned char *s1 = (const unsigned char *) p1;
   register const unsigned char *s2 = (const unsigned char *) p2;
   unsigned char c1, c2;

   do
     {
       c1 = (unsigned char) *s1++;
       c2 = (unsigned char) *s2++;
       if (c1 == '\0')
     return c1 - c2;
     }
   while (c1 == c2);

   return c1 - c2;
 }

Edit: @AndreyT doesn't believe me, so here's the assembly GCC 4.2 generated for me (OS X 10.7.5 64-bit Intel, default optimization level - no flags):

    .section    __TEXT,__text,regular,pure_instructions
    .globl  _main
    .align  4, 0x90
_main:
Leh_func_begin1:
    pushq   %rbp
Ltmp0:
    movq    %rsp, %rbp
Ltmp1:
    subq    $32, %rsp
Ltmp2:
    leaq    L_.str(%rip), %rax
    movq    %rax, -16(%rbp)
    leaq    L_.str1(%rip), %rax
    movq    %rax, -24(%rbp)
    movl    $-1, %ecx             ; <- THIS!
    xorb    %dl, %dl
    leaq    L_.str2(%rip), %rsi
    movq    %rsi, %rdi
    movl    %ecx, %esi
    movq    %rax, -32(%rbp)
    movb    %dl, %al
    callq   _printf               ; <- no call to `strcmp()` so far!
    movq    -16(%rbp), %rax
    movq    %rax, %rdi
    movq    -32(%rbp), %rsi
    callq   _strcmp               ; <- strcmp()
    movl    %eax, %ecx
    xorb    %dl, %dl
    leaq    L_.str2(%rip), %rdi
    movl    %ecx, %esi
    movb    %dl, %al
    callq   _printf               ; <- printf()
    movq    -16(%rbp), %rax
    movq    -24(%rbp), %rcx
    movq    %rax, %rdi
    movq    %rcx, %rsi
    callq   _strcmp               ; <- strcmp()
    movl    %eax, %ecx
    xorb    %dl, %dl
    leaq    L_.str2(%rip), %rdi
    movl    %ecx, %esi
    movb    %dl, %al
    callq   _printf               ; <- printf()
    movl    $0, -8(%rbp)
    movl    -8(%rbp), %eax
    movl    %eax, -4(%rbp)
    movl    -4(%rbp), %eax
    addq    $32, %rsp
    popq    %rbp
    ret
Leh_func_end1:

    .section    __TEXT,__cstring,cstring_literals
L_.str:
    .asciz   "a"

L_.str1:
    .asciz   "d"

L_.str2:
    .asciz   "%d\n"

    .section    __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
EH_frame0:
Lsection_eh_frame:
Leh_frame_common:
Lset0 = Leh_frame_common_end-Leh_frame_common_begin
    .long   Lset0
Leh_frame_common_begin:
    .long   0
    .byte   1
    .asciz   "zR"
    .byte   1
    .byte   120
    .byte   16
    .byte   1
    .byte   16
    .byte   12
    .byte   7
    .byte   8
    .byte   144
    .byte   1
    .align  3
Leh_frame_common_end:
    .globl  _main.eh
_main.eh:
Lset1 = Leh_frame_end1-Leh_frame_begin1
    .long   Lset1
Leh_frame_begin1:
Lset2 = Leh_frame_begin1-Leh_frame_common
    .long   Lset2
Ltmp3:
    .quad   Leh_func_begin1-Ltmp3
Lset3 = Leh_func_end1-Leh_func_begin1
    .quad   Lset3
    .byte   0
    .byte   4
Lset4 = Ltmp0-Leh_func_begin1
    .long   Lset4
    .byte   14
    .byte   16
    .byte   134
    .byte   2
    .byte   4
Lset5 = Ltmp1-Ltmp0
    .long   Lset5
    .byte   13
    .byte   6
    .align  3
Leh_frame_end1:


.subsections_via_symbols

And the original source code:

#include <stdio.h>
#include <string.h>

int main()
{
    const char *a = "a";
    const char *d = "d";
    printf("%d\n", strcmp("a", "d"));
    printf("%d\n", strcmp(a, "d"));
    printf("%d\n", strcmp(a, d));

    return 0;
}

And the output it generated (screenshot for having a better proof):

这篇关于为什么STRCMP的收益是不同的?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆