EDX-EAX寄存器对的划分导致商数很大 [英] EDX-EAX register pair divison resulting in big quotient

查看:124
本文介绍了EDX-EAX寄存器对的划分导致商数很大的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如果我在 EDX-EAX 中有一个 64位数字,并且我将其除以相对较小的 em>数字,可能会变成大于 32位的数字. 因此,此时div operator 仅设置进位标志?

我的问题是,我想在 EDX-EAX 中处理一个数字并将其写出一位数字,因此在这种情况下,我必须将 EDX-EAX 中的数字除以 10 以获得最后一位数字.

解决方案

否. DIV 在64b/32b中具有最大商2 32 -1.

溢出是通过#DE(除法错误)异常而不是CF标志来表示的.

如果64b数有一定限制,使用完整的64b(例如2 61 max),则可以先将其除以div 10 9 (从左起最接近2 32 ),然后按div 10分别进行两个半".但是正如Jester所指出的那样,64b div太慢了,以10的幂来执行sub听起来像是更好的主意,并且代码也将更加简单.


因为它是如此简单 ,那么为什么不添加代码呢?大约需要5分钟到60分钟之后(我对此不太满意,我认为可以用更短的代码以更优雅的方式完成它……在任何情况下都不会影响性能,这可以可以肯定地进行了优化,至少可以在需要的地方对齐循环,但是它至少可以工作,因此可以作为您的参考版本"进行比较/验证)...

NASM 32b linux可执行文件,保存到uint64toascii.asm
构建:nasm -f elf *.asm; ld -m elf_i386 -s -o uint64toascii *.o

section .text
    global _start       ;must be declared for using gcc
_start:                 ;tell linker entry point

    ; allocate 24B temporary buffer for ASCII number
    sub     esp,24
    ; output test numbers in loop
    mov     esi,testnumbers
testNumbersLoop:
    mov     eax,[esi]
    mov     edx,[esi+4]
    mov     edi,esp
    ; call the routine
    call    integer64btoascii
    ; add new line to output
    mov     [edi],byte 10
    inc     edi
    ; display number string
    mov     edx,edi
    sub     edx,esp     ; output length
    mov     ecx,esp     ; output buffer address
    mov     ebx,1       ; file descriptor (stdout)
    mov     eax,4       ; system call number (sys_write)
    int     0x80        ; call kernel
    ; loop through test numbers
    add     esi,8
    cmp     esi,testnumbersEND
    jb      testNumbersLoop
    ; exit
    add     esp,24      ; release temporary buffer
    mov     eax, 1      ; system call number (sys_exit)
    int     0x80        ; call kernel

integer64btoascii:
    ; edx:eax = number to convert, edi = buffer to output (at least 20B)
    ; returns edi pointing after last character
    push    eax
    push    edx
    push    esi
    push    ebx
    push    ebp
    push    ecx
    ; test for zero in edx:eax -> special handling
    mov     esi,edx
    or      esi,eax
    jz      .zeroNumber
    ; convert other numbers by subtracting 10^k powers
    mov     esi,pow10table-8
.skipLeadingZero:
    add     esi,8
    cmp     edx,[esi+4]
    jc      .skipLeadingZero
    jne     .next10powerInit
    cmp     eax,[esi]
    jc      .skipLeadingZero
    jmp     .next10powerInit
    ; since here every power of 10 is counted and set into output
.next10power:
    mov     [edi],cl    ; write counter digit of previous 10th power
    inc     edi
.next10powerInit:
    mov     ebx,[esi]
    mov     ebp,[esi+4] ; ebp:ebx = 10^k
    test    ebx,ebx
    jz      .finish     ; only zero terminator can have lower 32b == 0
    mov     cl,'0'
    add     esi,8
.compare10power:
    cmp     edx,ebp
    jc      .next10power
    jnz     .sub10power
    cmp     eax,ebx
    jc      .next10power
.sub10power:
    sub     eax,ebx
    sbb     edx,ebp
    inc     cl
    jmp     .compare10power

.zeroNumber:
    mov     [edi],byte '0'
    inc     edi

.finish:
    pop     ecx
    pop     ebp
    pop     ebx
    pop     esi
    pop     edx
    pop     eax
    ret

section .rodata

pow10table:
    dq  10000000000000000000
    dq  1000000000000000000
    dq  100000000000000000
    dq  10000000000000000
    dq  1000000000000000
    dq  100000000000000
    dq  10000000000000
    dq  1000000000000
    dq  100000000000
    dq  10000000000
    dq  1000000000
    dq  100000000
    dq  10000000
    dq  1000000
    dq  100000
    dq  10000
    dq  1000
    dq  100
    dq  10
    dq  1
    dq  0       ; terminator

testnumbers:
    dq  ~0          ; max 2^64-1 = 18446744073709551615
    dq  0           ; looks like zero to me
    dd  0, 1        ; 2^32 = 4294967296
    dq  1234567890  ; < 2^32 (edx = 0)
    dq  9999999999999999999
    dq  101001000100101
testnumbersEND:

您可以在 http://www.tutorialspoint.com/compile_assembly_online.php上在线试用它. (在此处复制源代码)


因为我不太喜欢第一个版本,所以我一直在玩弄它,主要是尝试实现简短的(LoC)代码,而不是特别在意性能或代码大小(懒得衡量任何东西,除了编辑器中的行数.

构建命令行+实时演示与以前的情况相同

section .text
    global _start       ;must be declared for using gcc
_start:                 ;tell linker entry point

    ; allocate 24B temporary buffer for ASCII number
    sub     esp,24
    ; output test numbers in loop
    mov     esi,testnumbers
testNumbersLoop:
    mov     eax,[esi]
    mov     edx,[esi+4]
    mov     edi,esp
    ; call the routine
    call    integer64btoascii
    ; add new line to output
    mov     [edi],byte 10
    inc     edi
    ; display number string
    mov     edx,edi
    sub     edx,esp     ; output length
    mov     ecx,esp     ; output buffer address
    mov     ebx,1       ; file descriptor (stdout)
    mov     eax,4       ; system call number (sys_write)
    int     0x80        ; call kernel
    ; loop through test numbers
    add     esi,8
    cmp     esi,testnumbersEND
    jb      testNumbersLoop
    ; exit
    add     esp,24      ; release temporary buffer
    mov     eax, 1      ; system call number (sys_exit)
    int     0x80        ; call kernel

integer64btoascii:
    ; edx:eax = number to convert, edi = buffer to output (at least 20B)
    ; returns edi pointing after last character
    push    eax
    push    edx
    push    esi
    push    ecx
    mov     ch,'1'          ; test value for skipping leading zeroes
    mov     esi,pow10table
.nextPow10:                 ; [esi+4]:[esi] = 10^k
    mov     cl,'0'-1
.countPow10:                ; subtract 10^k from edx:eax + count it
    sub     eax,[esi]
    sbb     edx,[esi+4]
    inc     cl              ; preserves CF
    jnc     .countPow10
    ; subtraction overflow, did "one too many" of them
    add     eax,[esi]       ; restore edx:eax to previous value
    adc     edx,[esi+4]
    cmp     cl,ch
    mov     [edi],cl        ; write the digit into output
    sbb     edi,-1          ; advance edi as needed (when cl>=ch)
    cmp     cl,ch
    lea     esi,[esi+8]     ; next power of 10
    adc     ch,-1           ; disable zero skip when non-zero found
    cmp     esi,pow10tableEND
    jb      .nextPow10      ; until all powers of 10 were processed
    cmp     ch,'1'          ; all zeroes output => edx:eax == 0, CF=0
    sbb     edi,-1          ; advance edi when CF=0 (all zeroes)
    pop     ecx
    pop     esi
    pop     edx
    pop     eax
    ret

section .rodata

pow10table:
    dq  10000000000000000000
    dq  1000000000000000000
    dq  100000000000000000
    dq  10000000000000000
    dq  1000000000000000
    dq  100000000000000
    dq  10000000000000
    dq  1000000000000
    dq  100000000000
    dq  10000000000
    dq  1000000000
    dq  100000000
    dq  10000000
    dq  1000000
    dq  100000
    dq  10000
    dq  1000
    dq  100
    dq  10
    dq  1
pow10tableEND:

testnumbers:
    dq  ~0          ; max 2^64-1 = 18446744073709551615
    dq  0           ; looks like zero to me
    dd  0, 1        ; 2^32 = 4294967296 (eax = 0)
    dq  1234567890  ; < 2^32 (edx = 0)
    dq  10000000000000000000    ; largest 10^k to fit into 64b
    dq  9999999999999999999     ; to verify "9"
    dq  10200300040000500000    ; to verify "0" in-between/at-end
testnumbersEND:

顺便说一句,如果CPU不会因部分规则chcl冲突而停滞不前(恕我直言,因为值的更新有些许不同,并且只会很少发生冲突),所以我相信第二版将比第一版更好,因为分支更加简化了.

但是,"believe"是这里的关键词,如果您追求表演,请关注! (并在关键循环上使用对齐8或16(或者可能只是4)",验证列表+性能中哪个更好)


还有一个版本,可能更容易理解前导零测试逻辑,此外,它可以轻松扩展为128b整数(ebxebp被保留,因此它们可以容纳另外64b的输入数字),当数+减在内存中时(因为256b在32b x86模式下不适合寄存器),以及其他任意位数.也可以将其修改为在64b模式下工作,只需要很少的更改就可以使用更多的寄存器(当然,所有这些都需要更大的10 k 次幂表,对于所需位数,最大为1)

我要发布这个,是因为我对如何最终解决零"难题感到非常高兴-这使整个周末都很烦人.

最后,存在一种优雅(且更快)的解决方案:仅在10 1 次幂后就已经退出了减法运算,因此eax保留值为0-9.然后只需将该值写入输出而无需任何跳过"测试,因为在非零数字的情况下,它属于正确的输出;在edx:eax == 0输入的情况下,它将创建单个'0'字符. h!

另外,我设法更改了跳过前导零"逻辑,以使产生的任意数量的数字都可以生存,而不仅仅是48位.

section .text
    global _start       ;must be declared for using gcc
_start:                 ;tell linker entry point
    ; allocate 24B temporary buffer for ASCII number
    sub     esp,24
    ; output test numbers in loop
    mov     esi,testnumbers
testNumbersLoop:
    mov     eax,[esi]
    mov     edx,[esi+4]
    mov     edi,esp
    ; call the routine
    call    integer64btoascii
    ; add new line to output
    mov     [edi],byte 10
    inc     edi
    ; display number string
    mov     edx,edi
    sub     edx,esp     ; output length
    mov     ecx,esp     ; output buffer address
    mov     ebx,1       ; file descriptor (stdout)
    mov     eax,4       ; system call number (sys_write)
    int     0x80        ; call kernel
    ; loop through test numbers
    add     esi,8
    cmp     esi,testnumbersEND
    jb      testNumbersLoop
    ; exit
    add     esp,24      ; release temporary buffer
    mov     eax, 1      ; system call number (sys_exit)
    int     0x80        ; call kernel

integer64btoascii:
    ; edx:eax = number to convert, edi = buffer to output (at least 20B)
    ; returns edi pointing after last character
    push    eax
    push    edx
    push    esi
    push    ecx
    mov     ch,'0'          ; test value to detect leading zeroes
    mov     esi,pow10table
.nextPow10:                 ; [esi+4]:[esi] = 10^k
    mov     cl,'0'-1        ; count of 10^k power in ASCII digit
.countPow10:                ; subtract 10^k from edx:eax + count it
    sub     eax,[esi]
    sbb     edx,[esi+4]
    inc     cl              ; preserves CF
    jnc     .countPow10     ; loop till subtraction overflows
    ; subtraction overflow, did "one too many" of them
    or      ch,cl           ; merge digit into test_leading_zeroes
    add     eax,[esi]       ; restore edx:eax to previous value
    adc     edx,[esi+4]
    cmp     ch,'1'          ; test is still '0'? => CF=1
    mov     [edi],cl        ; write the digit into output
    lea     esi,[esi+8]     ; next power of 10
    sbb     edi,-1          ; advance edi as needed (test value > '0')
    cmp     esi,pow10tableEND
    jb      .nextPow10      ; until all table powers of 10 were processed
    or      al,'0'          ; remaining eax = 0..9, convert to ASCII
    mov     [edi],al        ; store last digit
    inc     edi             ; last digit will advance edi always
    pop     ecx
    pop     esi
    pop     edx
    pop     eax
    ret

section .rodata

pow10table:
    dq  10000000000000000000
    dq  1000000000000000000
    dq  100000000000000000
    dq  10000000000000000
    dq  1000000000000000
    dq  100000000000000
    dq  10000000000000
    dq  1000000000000
    dq  100000000000
    dq  10000000000
    dq  1000000000
    dq  100000000
    dq  10000000
    dq  1000000
    dq  100000
    dq  10000
    dq  1000
    dq  100
    dq  10
pow10tableEND:

testnumbers:
    dq  ~0          ; max 2^64-1 = 18446744073709551615
    dq  0           ; looks like zero to me
    dd  0, 1        ; 2^32 = 4294967296 (eax = 0)
    dq  1234567890  ; < 2^32 (edx = 0)
    dq  10000000000000000000    ; largest 10^k to fit into 64b
    dq  9999999999999999999     ; to verify "9"
    dq  10200300040000500000    ; to verify "0" in-between/at-end
testnumbersEND:

为了获得更好的性能,应该有可能进行求逆值(倒数)乘法,以得到div 10乘以imul的方法,但是那太麻烦了.

这三个版本可能很简单,以至于一些学习Assembly的人都可以理解它们,并且它们还说明了几天后的思维进度.

If I have a 64 bit number in the EDX-EAX, and I divide it with a relatively small number, the quotient may become a number bigger than 32 bits. So at that point the div operator only sets the carry flag?

My problem is, that I would like to process a number in the EDX-EAX and write it out digit per digit, so in this case I would have to divide the number in EDX-EAX by 10 to get the last digit.

解决方案

No. DIV in 64b/32b has maximum quotient 232-1.

Overflow is indicated with the #DE (divide error) exception rather than with the CF flag.

If there's some limit for the 64b number, not using full 64b (like 261 max), then you may first split it by div 109 (nearest to 232 from left) first and then do the two "halves" separately by div 10. But as Jester noted, the 64b div is so slow, that doing the sub with powers of 10 sounds like better idea and the code will be simpler too.


because if it's so simple, then why not to add the code, right? Will be ~5min ... ~60min later (and I'm not very happy about it, I think it may be done in a bit more elegant way with shorter code ... not bothering with performance in either case, this one can be optimized for sure, at minimal to align loops where it matters, but it at least works, so it can be your "reference version" to compare/verify with)...

NASM 32b linux executable, save into uint64toascii.asm
to build: nasm -f elf *.asm; ld -m elf_i386 -s -o uint64toascii *.o

section .text
    global _start       ;must be declared for using gcc
_start:                 ;tell linker entry point

    ; allocate 24B temporary buffer for ASCII number
    sub     esp,24
    ; output test numbers in loop
    mov     esi,testnumbers
testNumbersLoop:
    mov     eax,[esi]
    mov     edx,[esi+4]
    mov     edi,esp
    ; call the routine
    call    integer64btoascii
    ; add new line to output
    mov     [edi],byte 10
    inc     edi
    ; display number string
    mov     edx,edi
    sub     edx,esp     ; output length
    mov     ecx,esp     ; output buffer address
    mov     ebx,1       ; file descriptor (stdout)
    mov     eax,4       ; system call number (sys_write)
    int     0x80        ; call kernel
    ; loop through test numbers
    add     esi,8
    cmp     esi,testnumbersEND
    jb      testNumbersLoop
    ; exit
    add     esp,24      ; release temporary buffer
    mov     eax, 1      ; system call number (sys_exit)
    int     0x80        ; call kernel

integer64btoascii:
    ; edx:eax = number to convert, edi = buffer to output (at least 20B)
    ; returns edi pointing after last character
    push    eax
    push    edx
    push    esi
    push    ebx
    push    ebp
    push    ecx
    ; test for zero in edx:eax -> special handling
    mov     esi,edx
    or      esi,eax
    jz      .zeroNumber
    ; convert other numbers by subtracting 10^k powers
    mov     esi,pow10table-8
.skipLeadingZero:
    add     esi,8
    cmp     edx,[esi+4]
    jc      .skipLeadingZero
    jne     .next10powerInit
    cmp     eax,[esi]
    jc      .skipLeadingZero
    jmp     .next10powerInit
    ; since here every power of 10 is counted and set into output
.next10power:
    mov     [edi],cl    ; write counter digit of previous 10th power
    inc     edi
.next10powerInit:
    mov     ebx,[esi]
    mov     ebp,[esi+4] ; ebp:ebx = 10^k
    test    ebx,ebx
    jz      .finish     ; only zero terminator can have lower 32b == 0
    mov     cl,'0'
    add     esi,8
.compare10power:
    cmp     edx,ebp
    jc      .next10power
    jnz     .sub10power
    cmp     eax,ebx
    jc      .next10power
.sub10power:
    sub     eax,ebx
    sbb     edx,ebp
    inc     cl
    jmp     .compare10power

.zeroNumber:
    mov     [edi],byte '0'
    inc     edi

.finish:
    pop     ecx
    pop     ebp
    pop     ebx
    pop     esi
    pop     edx
    pop     eax
    ret

section .rodata

pow10table:
    dq  10000000000000000000
    dq  1000000000000000000
    dq  100000000000000000
    dq  10000000000000000
    dq  1000000000000000
    dq  100000000000000
    dq  10000000000000
    dq  1000000000000
    dq  100000000000
    dq  10000000000
    dq  1000000000
    dq  100000000
    dq  10000000
    dq  1000000
    dq  100000
    dq  10000
    dq  1000
    dq  100
    dq  10
    dq  1
    dq  0       ; terminator

testnumbers:
    dq  ~0          ; max 2^64-1 = 18446744073709551615
    dq  0           ; looks like zero to me
    dd  0, 1        ; 2^32 = 4294967296
    dq  1234567890  ; < 2^32 (edx = 0)
    dq  9999999999999999999
    dq  101001000100101
testnumbersEND:

You can try it live on web at http://www.tutorialspoint.com/compile_assembly_online.php (copy the source there)


And because I didn't like that first version too much, I kept playing with it a bit, mostly trying to achieve a short (LoC) code, not particularly caring about performance or code size (too lazy to measure anything except number of lines in editor).

Build command line + live demo same as in previous case:

section .text
    global _start       ;must be declared for using gcc
_start:                 ;tell linker entry point

    ; allocate 24B temporary buffer for ASCII number
    sub     esp,24
    ; output test numbers in loop
    mov     esi,testnumbers
testNumbersLoop:
    mov     eax,[esi]
    mov     edx,[esi+4]
    mov     edi,esp
    ; call the routine
    call    integer64btoascii
    ; add new line to output
    mov     [edi],byte 10
    inc     edi
    ; display number string
    mov     edx,edi
    sub     edx,esp     ; output length
    mov     ecx,esp     ; output buffer address
    mov     ebx,1       ; file descriptor (stdout)
    mov     eax,4       ; system call number (sys_write)
    int     0x80        ; call kernel
    ; loop through test numbers
    add     esi,8
    cmp     esi,testnumbersEND
    jb      testNumbersLoop
    ; exit
    add     esp,24      ; release temporary buffer
    mov     eax, 1      ; system call number (sys_exit)
    int     0x80        ; call kernel

integer64btoascii:
    ; edx:eax = number to convert, edi = buffer to output (at least 20B)
    ; returns edi pointing after last character
    push    eax
    push    edx
    push    esi
    push    ecx
    mov     ch,'1'          ; test value for skipping leading zeroes
    mov     esi,pow10table
.nextPow10:                 ; [esi+4]:[esi] = 10^k
    mov     cl,'0'-1
.countPow10:                ; subtract 10^k from edx:eax + count it
    sub     eax,[esi]
    sbb     edx,[esi+4]
    inc     cl              ; preserves CF
    jnc     .countPow10
    ; subtraction overflow, did "one too many" of them
    add     eax,[esi]       ; restore edx:eax to previous value
    adc     edx,[esi+4]
    cmp     cl,ch
    mov     [edi],cl        ; write the digit into output
    sbb     edi,-1          ; advance edi as needed (when cl>=ch)
    cmp     cl,ch
    lea     esi,[esi+8]     ; next power of 10
    adc     ch,-1           ; disable zero skip when non-zero found
    cmp     esi,pow10tableEND
    jb      .nextPow10      ; until all powers of 10 were processed
    cmp     ch,'1'          ; all zeroes output => edx:eax == 0, CF=0
    sbb     edi,-1          ; advance edi when CF=0 (all zeroes)
    pop     ecx
    pop     esi
    pop     edx
    pop     eax
    ret

section .rodata

pow10table:
    dq  10000000000000000000
    dq  1000000000000000000
    dq  100000000000000000
    dq  10000000000000000
    dq  1000000000000000
    dq  100000000000000
    dq  10000000000000
    dq  1000000000000
    dq  100000000000
    dq  10000000000
    dq  1000000000
    dq  100000000
    dq  10000000
    dq  1000000
    dq  100000
    dq  10000
    dq  1000
    dq  100
    dq  10
    dq  1
pow10tableEND:

testnumbers:
    dq  ~0          ; max 2^64-1 = 18446744073709551615
    dq  0           ; looks like zero to me
    dd  0, 1        ; 2^32 = 4294967296 (eax = 0)
    dq  1234567890  ; < 2^32 (edx = 0)
    dq  10000000000000000000    ; largest 10^k to fit into 64b
    dq  9999999999999999999     ; to verify "9"
    dq  10200300040000500000    ; to verify "0" in-between/at-end
testnumbersEND:

BTW, if the CPU will not stall too much over partial-reg ch vs cl clashing (IMHO shouldn't, as the update of value is a bit apart and will collide only rarely), then I believe the second version will perform better than first one, as the branching is lot more simplified.

But "believe" is the keyword here, if you are after performance, profile! (and use "align 8 or 16 (or maybe just 4)" on key loops, verify listing+performance which one is better)


One more version, probably sort of simpler to understand the leading zeroes test logic, plus this can be extended to 128b integers easily (ebx and ebp are spared, so they may hold another 64b of input number), and to any other arbitrary number of bits, when the number + subtracting is in memory (as 256b will not fit into registers in 32b x86 mode). This can be also modified to work in 64b mode with more registers with only few changes (of course all of these require much larger table of 10k powers, up to the maximum one for desired number of bits).

I'm posting this one, as I got really happy about how I solved the "zero" puzzle finally - which was annoying me whole weekend.

In the end the elegant (and faster) solution exist: just exit subtraction already after 101 power, so eax is left with value 0-9. Then just write that value into output without any "skip" test, as in case of non-zero number it belongs to proper output, and in case of edx:eax == 0 input it will create the single '0' char. Duh!

Plus I managed to change "skip leading zeroes" logic to survive any number of digits produced, not only 48.

section .text
    global _start       ;must be declared for using gcc
_start:                 ;tell linker entry point
    ; allocate 24B temporary buffer for ASCII number
    sub     esp,24
    ; output test numbers in loop
    mov     esi,testnumbers
testNumbersLoop:
    mov     eax,[esi]
    mov     edx,[esi+4]
    mov     edi,esp
    ; call the routine
    call    integer64btoascii
    ; add new line to output
    mov     [edi],byte 10
    inc     edi
    ; display number string
    mov     edx,edi
    sub     edx,esp     ; output length
    mov     ecx,esp     ; output buffer address
    mov     ebx,1       ; file descriptor (stdout)
    mov     eax,4       ; system call number (sys_write)
    int     0x80        ; call kernel
    ; loop through test numbers
    add     esi,8
    cmp     esi,testnumbersEND
    jb      testNumbersLoop
    ; exit
    add     esp,24      ; release temporary buffer
    mov     eax, 1      ; system call number (sys_exit)
    int     0x80        ; call kernel

integer64btoascii:
    ; edx:eax = number to convert, edi = buffer to output (at least 20B)
    ; returns edi pointing after last character
    push    eax
    push    edx
    push    esi
    push    ecx
    mov     ch,'0'          ; test value to detect leading zeroes
    mov     esi,pow10table
.nextPow10:                 ; [esi+4]:[esi] = 10^k
    mov     cl,'0'-1        ; count of 10^k power in ASCII digit
.countPow10:                ; subtract 10^k from edx:eax + count it
    sub     eax,[esi]
    sbb     edx,[esi+4]
    inc     cl              ; preserves CF
    jnc     .countPow10     ; loop till subtraction overflows
    ; subtraction overflow, did "one too many" of them
    or      ch,cl           ; merge digit into test_leading_zeroes
    add     eax,[esi]       ; restore edx:eax to previous value
    adc     edx,[esi+4]
    cmp     ch,'1'          ; test is still '0'? => CF=1
    mov     [edi],cl        ; write the digit into output
    lea     esi,[esi+8]     ; next power of 10
    sbb     edi,-1          ; advance edi as needed (test value > '0')
    cmp     esi,pow10tableEND
    jb      .nextPow10      ; until all table powers of 10 were processed
    or      al,'0'          ; remaining eax = 0..9, convert to ASCII
    mov     [edi],al        ; store last digit
    inc     edi             ; last digit will advance edi always
    pop     ecx
    pop     esi
    pop     edx
    pop     eax
    ret

section .rodata

pow10table:
    dq  10000000000000000000
    dq  1000000000000000000
    dq  100000000000000000
    dq  10000000000000000
    dq  1000000000000000
    dq  100000000000000
    dq  10000000000000
    dq  1000000000000
    dq  100000000000
    dq  10000000000
    dq  1000000000
    dq  100000000
    dq  10000000
    dq  1000000
    dq  100000
    dq  10000
    dq  1000
    dq  100
    dq  10
pow10tableEND:

testnumbers:
    dq  ~0          ; max 2^64-1 = 18446744073709551615
    dq  0           ; looks like zero to me
    dd  0, 1        ; 2^32 = 4294967296 (eax = 0)
    dq  1234567890  ; < 2^32 (edx = 0)
    dq  10000000000000000000    ; largest 10^k to fit into 64b
    dq  9999999999999999999     ; to verify "9"
    dq  10200300040000500000    ; to verify "0" in-between/at-end
testnumbersEND:

For better performance it should be possible to do the inversion-value (reciprocal) multiplications to get div 10 by imul, but that's over my head.

These three versions are probably simple enough for somebody learning Assembly to understand them, plus they illustrate the progress of mind over couple of days.

这篇关于EDX-EAX寄存器对的划分导致商数很大的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆