更快的替代InttoStr / StrToInt? [英] faster alternative to InttoStr/StrToInt?
问题描述
我想知道有没有比 System.IntToStr / System.StrToInt
更快的选择。有一个快速版本,但只有UTF8。这是 Int32ToUTF8
从 SynCommons.pas
,由于字符串转换速度缓慢。
I wonder if there are faster alternative than System.IntToStr / System.StrToInt
. There is a fast version but only UTF8. Which is Int32ToUTF8
from SynCommons.pas
and due to slow string conversions it is bound to be slow. The purepascal RTL versions are really slow for 64 bit.
推荐答案
此例程比RTL中的例程快约40%。如果您使用WideChar []缓冲区可能会更快,因为字符串分配占用转换例程所用时间的75%:
This routine is approximately 40% faster than the routine in the RTL. It could be much faster if you worked with WideChar[] buffers because the string allocation is taking up 75% of the time used by the conversion routine:
- IntS32ToStr:24 ,77 ns / item(RTL替换)
IntLeStr:34,51 ns / item(RTL)
请注意,下面的例程使用SSE2,只有x86和x64版本被完全实现,
Please note that the routine below uses SSE2 and only x86 and x64 versions are fully implemented and tested.
在初始化中:
function IntU32ToWide( X: Longword; P: PWideChar ): PWideChar; register;
function IntS32ToWide( X: Integer; P: PWideChar ): PWideChar; register;
function IntS32ToStr ( X: Longword ): UnicodeString; register; inline;
在实现中:
{$CODEALIGN 16}
{$ALIGN 16}
const
DigitsClippedW: array [ 0..99 ] of LongWord = (
$000030, $000031, $000032, $000033, $000034, $000035, $000036, $000037, $000038, $000039,
$300031, $310031, $320031, $330031, $340031, $350031, $360031, $370031, $380031, $390031,
$300032, $310032, $320032, $330032, $340032, $350032, $360032, $370032, $380032, $390032,
$300033, $310033, $320033, $330033, $340033, $350033, $360033, $370033, $380033, $390033,
$300034, $310034, $320034, $330034, $340034, $350034, $360034, $370034, $380034, $390034,
$300035, $310035, $320035, $330035, $340035, $350035, $360035, $370035, $380035, $390035,
$300036, $310036, $320036, $330036, $340036, $350036, $360036, $370036, $380036, $390036,
$300037, $310037, $320037, $330037, $340037, $350037, $360037, $370037, $380037, $390037,
$300038, $310038, $320038, $330038, $340038, $350038, $360038, $370038, $380038, $390038,
$300039, $310039, $320039, $330039, $340039, $350039, $360039, $370039, $380039, $390039 );
// Delphi XE3 has no working alignment for 16 bytes for data but it has alignment for 16 bytes for code!
// So we encode our constants as a procedure and use constant offsets to the data.
const
Div10000_Shl45d = $00;
Shl16_minus_10000d = $10;
Div_1000_100_10_1w = $20;
Shl_1000_100_10_1w = $30;
Mul_10w = $40;
To_Asciiw = $50;
Mul_10000d = $60;
Div100_Shl19w = $70;
Mul100w = $80;
Div10_shl16w = $90;
To_Asciib = $A0;
procedure IntUToStrConsts();
asm
{$if defined( CPUX64 )}.NOFRAME{$ifend}
dd $d1b71759, $d1b71759, $d1b71759, $d1b71759; // RoundUp( 2^45 / 10000 )
dd $10000 - 10000, $10000 - 10000, $10000 - 10000, $10000 - 10000; // 1 shl 16 - 1e4
dw 8389, 5243, 13108, $8000, 8389, 5243, 13108, $8000; // 1000 100 10 1 div
dw 1 shl 7, 1 shl 11, 1 shl 13, 1 shl 15, 1 shl 7, 1 shl 11, 1 shl 13, 1 shl 15; // 1000 100 10 1 shr
dw 10, 10, 10, 10, 10, 10, 10, 10; // 10
dw $30, $30, $30, $30, $30, $30, $30, $30; // To Unicode / ASCII
dd 10000, 10000, 10000, 10000; // 10000
dw $147b, $147b, $147b, $147b, $147b, $147b, $147b, $147b // RoundUp( 2^19 / 100 )
dw 100, 100, 100, 100, 100, 100, 100, 100 // 100
dw $199a, $199a, $199a, $199a, $199a, $199a, $199a, $199a // RoundUp( 2^16 / 10 )
dd $30303030, $30303030, $30303030, $30303030 // To bytewise / ASCII
end;
function IntS32ToStr( X: Longword ): UnicodeString; register;
var
P, Q: PWideChar;
begin
SetLength( Result, 11 );
P := PWideChar( Pointer( Result ) );
// Full string buffer and set the length of the string with no resizing!
PLongword( ( NativeInt( Result ) - sizeof( Longword ) ) )^ := IntS32ToWide( X, P ) - P;
end;
function IntS32ToWide( X: Integer; P: PWideChar ): PWideChar;
{$if defined( CPUX86 )}
asm // eax = X, edx = P
cmp eax, 0
jge IntU32ToWide
mov word ptr [ edx ], Word( '-' )
neg eax
lea edx, [ edx + 2 ]
jmp IntU32ToWide
end;
{$else if defined( CPUX64 )}
asm // ecx = X, rdx = P
.NOFRAME
cmp ecx, 0
jge IntU32ToWide
mov word ptr [ rdx ], Word( '-' )
neg ecx
lea rdx, [ rdx + 2 ]
jmp IntU32ToWide
end;
{$else}
begin
if X >= 0 then begin
Result := IntU32ToWide( Longword( X ), P );
end else begin
P^ := '-';
Result := IntU32ToWide( Longword( -X ), P + 1 );
end;
end;
{$ifend}
function IntU32ToWide( X: Longword; P: PWideChar ): PWideChar; register;
{$if defined( CPUX86 )}
asm
cmp eax, 100000000
jb @Medium
@Large:
push edx
xor edx, edx
mov ecx, 100000000
div ecx
pop ecx
// eax = high one or two digit value, edx = 8 digit value, ecx = pointer
// Emit the first 2 digits
mov eax, dword ptr [ DigitsClippedW + eax * 4 ]
mov [ ecx ], eax
cmp eax, $10000
setae al
movzx eax, al
lea eax, [ eax * 2 + ecx + 18 ]
// edx = 8 digit value, ecx = pointer
// Emit 8 follow digits
movd xmm1, edx // xmm1 = Value
movdqa xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
pmuludq xmm0, xmm1
psrlq xmm0, 45 // xmm0 = xmm1 div 10000
pmuludq xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
paddd xmm0, xmm1 // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
psllq xmm0, 2
punpcklwd xmm0, xmm0
punpckldq xmm0, xmm0 // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
movdqa xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ] // xmm2 := xmm0 * 10; shift to left one word.
pmullw xmm2, xmm0
psllq xmm2, 16
psubw xmm0, xmm2 // Extract digits
por xmm0, dqword ptr [ IntUToStrConsts + To_ASCIIw ] // Digits to ASCII
shufps xmm0, xmm0, $4E
movdqu [ eax - 16 ], xmm0 // And save 8 digits at once
ret
@Medium:
cmp eax, 100
jb @Small
// eax 2..8 digits, edx = pointer
// Emit 2..8 digits
movd xmm1, eax // xmm1 = Value
movdqa xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
pmuludq xmm0, xmm1
psrlq xmm0, 45 // xmm0 = xmm1 div 10000
pmuludq xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
paddd xmm0, xmm1 // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
psllq xmm0, 2
punpcklwd xmm0, xmm0
punpckldq xmm0, xmm0 // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
movdqa xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ] // xmm2 := xmm0 * 10; shift to left one word.
pmullw xmm2, xmm0
psllq xmm2, 16
psubw xmm0, xmm2 // Extract digits
movdqa xmm1, dqword ptr [ IntUToStrConsts + To_ASCIIw ] // Digits to ASCII
por xmm0, xmm1
shufps xmm0, xmm0, $4E
// Now we have 8 Unicode characters in the xmm0 register in the correct order.
pcmpeqw xmm1, xmm0 // scan for zeroes.
pmovmskb eax, xmm1
packuswb xmm0, xmm0 // convert to bytes
xor eax, $FFFF // change polarity
bsf eax, eax // amount to shift in bytes.
lea ecx, [ eax * 4 ]
movd xmm1, ecx
psrlq xmm0, xmm1 // bytes shifted.
pxor xmm2, xmm2
punpcklbw xmm0, xmm2
neg eax
movdqu dqword ptr [ edx ], xmm0
lea eax, [ edx + 16 + eax ]
ret
@Small:
// eax 1..2 digits, edx = pointer
// Emit one or two digits
mov eax, dword ptr [ DigitsClippedW + eax * 4 ]
mov [ edx ], eax
cmp eax, $10000
setae al
movzx eax, al
lea eax, [ edx + eax * 2 + 2 ]
end;
{$else if defined( CPUX64 )}
asm
cmp ecx, 100000000
jb @Medium
@Large:
mov r8, rdx // r8 = pointer
// Split up low 8 digits from high 1 or 2 digits..
mov eax, ecx
mov r9, 12379400392853802749 // RoundUp( 2^64+26 / 1e8 )
mul rax, r9
shr rdx, 26
mov r10, rdx // r10 = eax div 1e8
mov rax, rdx
mov r9, 100000000
mul rax, r9
sub ecx, eax // ecx = eax mod 1e8
// Emit the first 2 digits
lea r9, [ DigitsClippedW ]
mov eax, dword ptr [ r9 + r10 * 4 ]
mov dword ptr [ r8 ], eax
// advance pointer ( also for the next 8 bytes)
cmp eax, $10000
setae al
movzx rax, al
lea rax, [ rax * 2 + r8 + 2 + 16 ]
// ecx = 8 digit value, r8 = pointer + 8
movd xmm1, ecx // xmm1 = Value
movdqa xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
pmuludq xmm0, xmm1
psrlq xmm0, 45 // xmm0 = xmm1 div 10000
pmuludq xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
paddd xmm0, xmm1 // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
psllq xmm0, 2
punpcklwd xmm0, xmm0
punpckldq xmm0, xmm0 // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
movdqa xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ] // xmm2 := xmm0 * 10; shift to left one word.
pmullw xmm2, xmm0
psllq xmm2, 16
psubw xmm0, xmm2 // Extract digits
por xmm0, dqword ptr [ IntUToStrConsts + To_ASCIIw ] // Digits to ASCII
shufps xmm0, xmm0, $4E
movdqu [ rax - 16 ], xmm0 // And save 8 digits at once
ret
@Medium:
cmp ecx, 100
jb @Small
// eax 2..8 digits, rdx = pointer
// Emit 2..8 digits
movd xmm1, ecx // xmm1 = Value
movdqa xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
pmuludq xmm0, xmm1
psrlq xmm0, 45 // xmm0 = xmm1 div 10000
pmuludq xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
paddd xmm0, xmm1 // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
psllq xmm0, 2
punpcklwd xmm0, xmm0
punpckldq xmm0, xmm0 // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
pmulhuw xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
movdqa xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ] // xmm2 := xmm0 * 10; shift to left one word.
pmullw xmm2, xmm0
psllq xmm2, 16
psubw xmm0, xmm2 // Extract digits
movdqa xmm1, dqword ptr [ IntUToStrConsts + To_ASCIIw ] // Digits to ASCII
por xmm0, xmm1
shufps xmm0, xmm0, $4E
// Now we have 8 Unicode characters in the xmm0 register in the correct order.
pcmpeqw xmm1, xmm0 // scan for zeroes.
pmovmskb eax, xmm1
packuswb xmm0, xmm0 // convert to bytes
xor eax, $FFFF // change polarity
bsf eax, eax // amount to shift in bytes.
lea ecx, [ eax * 4 ]
movd xmm1, ecx
psrlq xmm0, xmm1 // bytes shifted.
pxor xmm2, xmm2
punpcklbw xmm0, xmm2
neg rax
movdqu dqword ptr [ rdx ], xmm0
lea rax, [ rdx + 16 + rax ]
ret
@Small:
// ecx 1..2 digits, rdx = pointer
// Emit one or two digits
lea r9, [ DigitsClippedW ]
mov eax, dword ptr [ r9 + rcx * 4 ]
mov [ rdx ], eax
cmp eax, $10000
setae al
movzx rax, al
lea rax, [ rdx + rax * 2 + 2 ]
end;
{$else}
begin
Assert( False, 'Not implemented.' );
end;
{$ifend}
这篇关于更快的替代InttoStr / StrToInt?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!