Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mitya57/7zip
  • unxed/7zip
  • eugenesan/7zip
  • justdan96/7zip
  • debian/7zip
5 results
Show changes
Commits on Source (170)
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
; 7zAsm.asm -- ASM macros
; 2022-05-16 : Igor Pavlov : Public domain
; 2023-12-08 : Igor Pavlov : Public domain
; UASM can require these changes
......@@ -43,7 +43,7 @@ else
endif
endif
OPTION PROLOGUE:NONE
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
MY_ASM_START macro
......@@ -121,10 +121,29 @@ endif
x2_H equ DH
x3_H equ BH
; r0_L equ AL
; r1_L equ CL
; r2_L equ DL
; r3_L equ BL
; r0_H equ AH
; r1_H equ CH
; r2_H equ DH
; r3_H equ BH
ifdef x64
x5_L equ BPL
x6_L equ SIL
x7_L equ DIL
x8_L equ r8b
x9_L equ r9b
x10_L equ r10b
x11_L equ r11b
x12_L equ r12b
x13_L equ r13b
x14_L equ r14b
x15_L equ r15b
r0 equ RAX
r1 equ RCX
......@@ -153,6 +172,22 @@ else
r7 equ x7
endif
x0_R equ r0
x1_R equ r1
x2_R equ r2
x3_R equ r3
x4_R equ r4
x5_R equ r5
x6_R equ r6
x7_R equ r7
x8_R equ r8
x9_R equ r9
x10_R equ r10
x11_R equ r11
x12_R equ r12
x13_R equ r13
x14_R equ r14
x15_R equ r15
ifdef x64
ifdef ABI_LINUX
......@@ -200,6 +235,14 @@ REG_ABI_PARAM_0 equ REG_PARAM_0
REG_ABI_PARAM_1_x equ REG_PARAM_1_x
REG_ABI_PARAM_1 equ REG_PARAM_1
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
MY_PUSH_4_REGS
endm
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
MY_POP_4_REGS
endm
else
; x64
......@@ -261,12 +304,25 @@ endm
endif ; IS_LINUX
MY_PUSH_PRESERVED_ABI_REGS macro
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
if (IS_LINUX gt 0)
MY_PUSH_2_REGS
else
MY_PUSH_4_REGS
endif
endm
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
if (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
MY_PUSH_PRESERVED_ABI_REGS macro
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
push r12
push r13
push r14
......@@ -279,11 +335,7 @@ MY_POP_PRESERVED_ABI_REGS macro
pop r14
pop r13
pop r12
if (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
endm
endif ; x64
; 7zCrcOpt.asm -- CRC32 calculation : optimized version
; 2021-02-07 : Igor Pavlov : Public domain
; 2023-12-08 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
rD equ r2
rN equ r7
rT equ r5
NUM_WORDS equ 3
UNROLL_CNT equ 2
ifdef x64
num_VAR equ r8
table_VAR equ r9
else
if (IS_CDECL gt 0)
crc_OFFS equ (REG_SIZE * 5)
data_OFFS equ (REG_SIZE + crc_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
else
size_OFFS equ (REG_SIZE * 5)
endif
table_OFFS equ (REG_SIZE + size_OFFS)
num_VAR equ [r4 + size_OFFS]
table_VAR equ [r4 + table_OFFS]
if (NUM_WORDS lt 1) or (NUM_WORDS gt 64)
.err <NUM_WORDS_IS_INCORRECT>
endif
if (UNROLL_CNT lt 1)
.err <UNROLL_CNT_IS_INCORRECT>
endif
SRCDAT equ rD + rN * 1 + 4 *
rD equ r2
rD_x equ x2
rN equ r7
rT equ r5
ifndef x64
if (IS_CDECL gt 0)
crc_OFFS equ (REG_SIZE * 5)
data_OFFS equ (REG_SIZE + crc_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
else
size_OFFS equ (REG_SIZE * 5)
endif
table_OFFS equ (REG_SIZE + size_OFFS)
endif
; rN + rD is same speed as rD, but we reduce one instruction in loop
SRCDAT_1 equ rN + rD * 1 + 1 *
SRCDAT_4 equ rN + rD * 1 + 4 *
CRC macro op:req, dest:req, src:req, t:req
op dest, DWORD PTR [rT + src * 4 + 0400h * t]
op dest, dword ptr [rT + @CatStr(src, _R) * 4 + 0400h * (t)]
endm
CRC_XOR macro dest:req, src:req, t:req
CRC xor, dest, src, t
CRC xor, dest, src, t
endm
CRC_MOV macro dest:req, src:req, t:req
CRC mov, dest, src, t
CRC mov, dest, src, t
endm
MOVZXLO macro dest:req, src:req
movzx dest, @CatStr(src, _L)
endm
MOVZXHI macro dest:req, src:req
movzx dest, @CatStr(src, _H)
endm
; movzx x0, x0_L - is slow in some cpus (ivb), if same register for src and dest
; movzx x3, x0_L sometimes is 0 cycles latency (not always)
; movzx x3, x0_L sometimes is 0.5 cycles latency
; movzx x3, x0_H is 2 cycles latency in some cpus
CRC1b macro
movzx x6, BYTE PTR [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shr x0, 8
CRC xor, x0, r6, 0
dec rN
movzx x6, byte ptr [rD]
MOVZXLO x3, x0
inc rD
shr x0, 8
xor x6, x3
CRC_XOR x0, x6, 0
dec rN
endm
LOAD_1 macro dest:req, t:req, iter:req, index:req
movzx dest, byte ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + index)]
endm
LOAD_2 macro dest:req, t:req, iter:req, index:req
movzx dest, word ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + index)]
endm
CRC_QUAD macro nn, t:req, iter:req
ifdef x64
; paired memory loads give 1-3% speed gain, but it uses more registers
LOAD_2 x3, t, iter, 0
LOAD_2 x9, t, iter, 2
MOVZXLO x6, x3
shr x3, 8
CRC_XOR nn, x6, t * 4 + 3
MOVZXLO x6, x9
shr x9, 8
CRC_XOR nn, x3, t * 4 + 2
CRC_XOR nn, x6, t * 4 + 1
CRC_XOR nn, x9, t * 4 + 0
elseif 0
LOAD_2 x3, t, iter, 0
MOVZXLO x6, x3
shr x3, 8
CRC_XOR nn, x6, t * 4 + 3
CRC_XOR nn, x3, t * 4 + 2
LOAD_2 x3, t, iter, 2
MOVZXLO x6, x3
shr x3, 8
CRC_XOR nn, x6, t * 4 + 1
CRC_XOR nn, x3, t * 4 + 0
elseif 0
LOAD_1 x3, t, iter, 0
LOAD_1 x6, t, iter, 1
CRC_XOR nn, x3, t * 4 + 3
CRC_XOR nn, x6, t * 4 + 2
LOAD_1 x3, t, iter, 2
LOAD_1 x6, t, iter, 3
CRC_XOR nn, x3, t * 4 + 1
CRC_XOR nn, x6, t * 4 + 0
else
; 32-bit load is better if there is only one read port (core2)
; but that code can be slower if there are 2 read ports (snb)
mov x3, dword ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + 0)]
MOVZXLO x6, x3
CRC_XOR nn, x6, t * 4 + 3
MOVZXHI x6, x3
shr x3, 16
CRC_XOR nn, x6, t * 4 + 2
MOVZXLO x6, x3
shr x3, 8
CRC_XOR nn, x6, t * 4 + 1
CRC_XOR nn, x3, t * 4 + 0
endif
endm
MY_PROLOG macro crc_end:req
LAST equ (4 * (NUM_WORDS - 1))
CRC_ITER macro qq, nn, iter
mov nn, [SRCDAT_4 (NUM_WORDS * (1 + iter))]
i = 0
rept NUM_WORDS - 1
CRC_QUAD nn, i, iter
i = i + 1
endm
MOVZXLO x6, qq
mov x3, qq
shr x3, 24
CRC_XOR nn, x6, LAST + 3
CRC_XOR nn, x3, LAST + 0
ror qq, 16
MOVZXLO x6, qq
shr qq, 24
CRC_XOR nn, x6, LAST + 1
if ((UNROLL_CNT and 1) eq 1) and (iter eq (UNROLL_CNT - 1))
CRC_MOV qq, qq, LAST + 2
xor qq, nn
else
CRC_XOR nn, qq, LAST + 2
endif
endm
; + 4 for prefetching next 4-bytes after current iteration
NUM_BYTES_LIMIT equ (NUM_WORDS * 4 * UNROLL_CNT + 4)
ALIGN_MASK equ 3
; MY_PROC @CatStr(CrcUpdateT, 12), 4
MY_PROC @CatStr(CrcUpdateT, %(NUM_WORDS * 4)), 4
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
ifdef x64
mov x0, REG_ABI_PARAM_0_x ; x0 = x1(win) / x7(linux)
mov rT, REG_ABI_PARAM_3 ; r5 = r9(win) / x1(linux)
mov rN, REG_ABI_PARAM_2 ; r7 = r8(win) / r2(linux)
; mov rD, REG_ABI_PARAM_1 ; r2 = r2(win)
if (IS_LINUX gt 0)
MY_PUSH_2_REGS
mov x0, REG_ABI_PARAM_0_x ; x0 = x7
mov rT, REG_ABI_PARAM_3 ; r5 = r1
mov rN, REG_ABI_PARAM_2 ; r7 = r2
mov rD, REG_ABI_PARAM_1 ; r2 = r6
else
MY_PUSH_4_REGS
mov x0, REG_ABI_PARAM_0_x ; x0 = x1
mov rT, REG_ABI_PARAM_3 ; r5 = r9
mov rN, REG_ABI_PARAM_2 ; r7 = r8
; mov rD, REG_ABI_PARAM_1 ; r2 = r2
endif
else
MY_PUSH_4_REGS
if (IS_CDECL gt 0)
mov x0, [r4 + crc_OFFS]
mov rD, [r4 + data_OFFS]
else
mov x0, REG_ABI_PARAM_0_x
endif
mov rN, num_VAR
mov rT, table_VAR
mov rN, [r4 + size_OFFS]
mov rT, [r4 + table_OFFS]
endif
test rN, rN
jz crc_end
@@:
test rD, 7
jz @F
CRC1b
jnz @B
@@:
cmp rN, 16
jb crc_end
add rN, rD
mov num_VAR, rN
sub rN, 8
and rN, NOT 7
sub rD, rN
xor x0, [SRCDAT 0]
endm
cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK
jb crc_end
@@:
test rD_x, ALIGN_MASK ; test rD, ALIGN_MASK
jz @F
CRC1b
jmp @B
@@:
xor x0, dword ptr [rD]
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
sub rD, rN
MY_EPILOG macro crc_end:req
xor x0, [SRCDAT 0]
mov rD, rN
mov rN, num_VAR
sub rN, rD
crc_end:
test rN, rN
jz @F
CRC1b
jmp crc_end
@@:
if (IS_X64 gt 0) and (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
align 16
@@:
unr_index = 0
while unr_index lt UNROLL_CNT
if (unr_index and 1) eq 0
CRC_ITER x0, x1, unr_index
else
CRC_ITER x1, x0, unr_index
endif
unr_index = unr_index + 1
endm
MY_PROC CrcUpdateT8, 4
MY_PROLOG crc_end_8
mov x1, [SRCDAT 1]
align 16
main_loop_8:
mov x6, [SRCDAT 2]
movzx x3, x1_L
CRC_XOR x6, r3, 3
movzx x3, x1_H
CRC_XOR x6, r3, 2
shr x1, 16
movzx x3, x1_L
movzx x1, x1_H
CRC_XOR x6, r3, 1
movzx x3, x0_L
CRC_XOR x6, r1, 0
mov x1, [SRCDAT 3]
CRC_XOR x6, r3, 7
movzx x3, x0_H
shr x0, 16
CRC_XOR x6, r3, 6
movzx x3, x0_L
CRC_XOR x6, r3, 5
movzx x3, x0_H
CRC_MOV x0, r3, 4
xor x0, x6
add rD, 8
jnz main_loop_8
MY_EPILOG crc_end_8
MY_ENDP
add rD, NUM_WORDS * 4 * UNROLL_CNT
jnc @B
if 0
; byte verson
add rD, rN
xor x0, dword ptr [rD]
add rN, NUM_BYTES_LIMIT - 1
else
; 4-byte version
add rN, 4 * NUM_WORDS * UNROLL_CNT
sub rD, 4 * NUM_WORDS * UNROLL_CNT
@@:
MOVZXLO x3, x0
MOVZXHI x1, x0
shr x0, 16
MOVZXLO x6, x0
shr x0, 8
CRC_MOV x0, x0, 0
CRC_XOR x0, x3, 3
CRC_XOR x0, x1, 2
CRC_XOR x0, x6, 1
add rD, 4
if (NUM_WORDS * UNROLL_CNT) ne 1
jc @F
xor x0, [SRCDAT_4 0]
jmp @B
@@:
endif
add rD, rN
add rN, 4 - 1
endif
sub rN, rD
crc_end:
test rN, rN
jz func_end
@@:
CRC1b
jnz @B
MY_PROC CrcUpdateT4, 4
MY_PROLOG crc_end_4
align 16
main_loop_4:
movzx x1, x0_L
movzx x3, x0_H
shr x0, 16
movzx x6, x0_H
and x0, 0FFh
CRC_MOV x1, r1, 3
xor x1, [SRCDAT 1]
CRC_XOR x1, r3, 2
CRC_XOR x1, r6, 0
CRC_XOR x1, r0, 1
movzx x0, x1_L
movzx x3, x1_H
shr x1, 16
movzx x6, x1_H
and x1, 0FFh
CRC_MOV x0, r0, 3
xor x0, [SRCDAT 2]
CRC_XOR x0, r3, 2
CRC_XOR x0, r6, 0
CRC_XOR x0, r1, 1
add rD, 8
jnz main_loop_4
MY_EPILOG crc_end_4
func_end:
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
MY_ENDP
end
File mode changed from 100755 to 100644
; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
; 2021-07-21: Igor Pavlov : Public domain
; 2024-06-18: Igor Pavlov : Public domain
;
ifndef x64
......@@ -11,10 +11,31 @@ include 7zAsm.asm
MY_ASM_START
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
if (IS_LINUX gt 0)
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
else
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
endif
endif
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
MY_ALIGN macro num:req
align num
; align 16
endm
else
MY_ALIGN macro num:req
; We expect that ".text" is aligned for 16-bytes.
; So we don't need large alignment inside our function.
align 16
endm
endif
MY_ALIGN_16 macro
MY_ALIGN 16
endm
MY_ALIGN_32 macro
......@@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var
endm
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
; MY_ALIGN_64
else
MY_ALIGN_16
endif
MY_PROC GetMatchesSpecN_2, 13
MY_PUSH_PRESERVED_ABI_REGS
mov r0, RSP
......@@ -508,6 +533,8 @@ fin:
MY_POP_PRESERVED_ABI_REGS
MY_ENDP
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
_TEXT$LZFINDOPT ENDS
endif
end
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
; 2021-02-23: Igor Pavlov : Public domain
; 2024-06-18: Igor Pavlov : Public domain
;
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
; function for check at link time.
......@@ -17,11 +17,41 @@ include 7zAsm.asm
MY_ASM_START
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
; The performance is almost identical in our tests.
; But the performance can depend from position of lzmadec code inside instruction cache
; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
; And 64-byte alignment provides a more consistent speed regardless
; of the code's position in the executable.
; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
; code in 64-byte block after compilation provides better speed by some reason.
; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
if (IS_LINUX gt 0)
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
else
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
endif
endif
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
MY_ALIGN macro num:req
align num
; align 16
endm
else
MY_ALIGN macro num:req
; We expect that ".text" is aligned for 16-bytes.
; So we don't need large alignment inside out function.
align 16
endm
endif
MY_ALIGN_16 macro
MY_ALIGN 16
......@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
PARAM_limit equ REG_ABI_PARAM_1
PARAM_bufLimit equ REG_ABI_PARAM_2
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
; MY_ALIGN_64
else
MY_ALIGN_16
endif
MY_PROC LzmaDec_DecodeReal_3, 3
MY_PUSH_PRESERVED_ABI_REGS
......@@ -1298,6 +1332,8 @@ fin:
MY_POP_PRESERVED_ABI_REGS
MY_ENDP
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
_TEXT$LZMADECOPT ENDS
endif
end
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
; 2024-06-16 : Igor Pavlov : Public domain
include 7zAsm.asm
......@@ -20,7 +20,7 @@ MY_ASM_START
CONST SEGMENT
CONST SEGMENT READONLY
align 16
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
......
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
; 2022-04-17 : Igor Pavlov : Public domain
; 2024-06-16 : Igor Pavlov : Public domain
include 7zAsm.asm
......@@ -20,7 +20,7 @@ endif
EXTRN K_CONST:xmmword
@
CONST SEGMENT
CONST SEGMENT READONLY
align 16
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
......
; XzCrc64Opt.asm -- CRC64 calculation : optimized version
; 2021-02-06 : Igor Pavlov : Public domain
; 2023-12-08 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
NUM_WORDS equ 3
if (NUM_WORDS lt 1) or (NUM_WORDS gt 64)
.err <num_words_IS_INCORRECT>
endif
NUM_SKIP_BYTES equ ((NUM_WORDS - 2) * 4)
MOVZXLO macro dest:req, src:req
movzx dest, @CatStr(src, _L)
endm
MOVZXHI macro dest:req, src:req
movzx dest, @CatStr(src, _H)
endm
ifdef x64
rD equ r9
rD equ r11
rN equ r10
rT equ r5
num_VAR equ r8
SRCDAT4 equ dword ptr [rD + rN * 1]
rT equ r9
CRC_OP macro op:req, dest:req, src:req, t:req
op dest, QWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t)]
endm
CRC_XOR macro dest:req, src:req, t:req
xor dest, QWORD PTR [rT + src * 8 + 0800h * t]
CRC_OP xor, dest, src, t
endm
CRC_MOV macro dest:req, src:req, t:req
CRC_OP mov, dest, src, t
endm
CRC1b macro
movzx x6, BYTE PTR [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shr r0, 8
CRC_XOR r0, r6, 0
dec rN
endm
MY_PROLOG macro crc_end:req
ifdef ABI_LINUX
MY_PUSH_2_REGS
else
MY_PUSH_4_REGS
endif
mov r0, REG_ABI_PARAM_0
mov rN, REG_ABI_PARAM_2
mov rT, REG_ABI_PARAM_3
mov rD, REG_ABI_PARAM_1
test rN, rN
jz crc_end
@@:
test rD, 3
jz @F
CRC1b
jnz @B
@@:
cmp rN, 8
jb crc_end
add rN, rD
mov num_VAR, rN
sub rN, 4
and rN, NOT 3
sub rD, rN
mov x1, SRCDAT4
xor r0, r1
add rN, 4
endm
MY_EPILOG macro crc_end:req
sub rN, 4
mov x1, SRCDAT4
xor r0, r1
mov rD, rN
mov rN, num_VAR
sub rN, rD
crc_end:
test rN, rN
jz @F
CRC1b
jmp crc_end
@@:
ifdef ABI_LINUX
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
movzx x6, BYTE PTR [rD]
inc rD
MOVZXLO x3, x0
xor x6, x3
shr r0, 8
CRC_XOR r0, x6, 0
dec rN
endm
MY_PROC XzCrc64UpdateT4, 4
MY_PROLOG crc_end_4
align 16
main_loop_4:
mov x1, SRCDAT4
movzx x2, x0_L
movzx x3, x0_H
shr r0, 16
movzx x6, x0_L
movzx x7, x0_H
shr r0, 16
CRC_XOR r1, r2, 3
CRC_XOR r0, r3, 2
CRC_XOR r1, r6, 1
CRC_XOR r0, r7, 0
xor r0, r1
add rD, 4
jnz main_loop_4
MY_EPILOG crc_end_4
; ALIGN_MASK is 3 or 7 bytes alignment:
ALIGN_MASK equ (7 - (NUM_WORDS and 1) * 4)
if NUM_WORDS eq 1
src_rN_offset equ 4
; + 4 for prefetching next 4-bytes after current iteration
NUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 4)
SRCDAT4 equ DWORD PTR [rN + rD * 1]
XOR_NEXT macro
mov x1, [rD]
xor r0, r1
endm
else ; NUM_WORDS > 1
src_rN_offset equ 8
; + 8 for prefetching next 8-bytes after current iteration
NUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 8)
XOR_NEXT macro
xor r0, QWORD PTR [rD] ; 64-bit read, can be unaligned
endm
; 32-bit or 64-bit
LOAD_SRC_MULT4 macro dest:req, word_index:req
mov dest, [rN + rD * 1 + 4 * (word_index) - src_rN_offset];
endm
endif
MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 4
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
mov r0, REG_ABI_PARAM_0 ; r0 <- r1 / r7
mov rD, REG_ABI_PARAM_1 ; r11 <- r2 / r6
mov rN, REG_ABI_PARAM_2 ; r10 <- r8 / r2
if (IS_LINUX gt 0)
mov rT, REG_ABI_PARAM_3 ; r9 <- r9 / r1
endif
cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK
jb crc_end
@@:
test rD, ALIGN_MASK
jz @F
CRC1b
jmp @B
@@:
XOR_NEXT
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
sub rD, rN
add rN, src_rN_offset
align 16
@@:
if NUM_WORDS eq 1
mov x1, x0
shr x1, 8
MOVZXLO x3, x1
MOVZXLO x2, x0
shr x1, 8
shr r0, 32
xor x0, SRCDAT4
CRC_XOR r0, x2, 3
CRC_XOR r0, x3, 2
MOVZXLO x2, x1
shr x1, 8
CRC_XOR r0, x2, 1
CRC_XOR r0, x1, 0
else ; NUM_WORDS > 1
if NUM_WORDS ne 2
k = 2
while k lt NUM_WORDS
LOAD_SRC_MULT4 x1, k
crc_op1 textequ <xor>
if k eq 2
if (NUM_WORDS and 1)
LOAD_SRC_MULT4 x7, NUM_WORDS ; aligned 32-bit
LOAD_SRC_MULT4 x6, NUM_WORDS + 1 ; aligned 32-bit
shl r6, 32
else
LOAD_SRC_MULT4 r6, NUM_WORDS ; aligned 64-bit
crc_op1 textequ <mov>
endif
endif
table = 4 * (NUM_WORDS - 1 - k)
MOVZXLO x3, x1
CRC_OP crc_op1, r7, x3, 3 + table
MOVZXHI x3, x1
shr x1, 16
CRC_XOR r6, x3, 2 + table
MOVZXLO x3, x1
shr x1, 8
CRC_XOR r7, x3, 1 + table
CRC_XOR r6, x1, 0 + table
k = k + 1
endm
crc_op2 textequ <xor>
else ; NUM_WORDS == 2
LOAD_SRC_MULT4 r6, NUM_WORDS ; aligned 64-bit
crc_op2 textequ <mov>
endif ; NUM_WORDS == 2
MOVZXHI x3, x0
MOVZXLO x2, x0
mov r1, r0
shr r1, 32
shr x0, 16
CRC_XOR r6, x2, NUM_SKIP_BYTES + 7
CRC_OP crc_op2, r7, x3, NUM_SKIP_BYTES + 6
MOVZXLO x2, x0
MOVZXHI x5, x1
MOVZXLO x3, x1
shr x0, 8
shr x1, 16
CRC_XOR r7, x2, NUM_SKIP_BYTES + 5
CRC_XOR r6, x3, NUM_SKIP_BYTES + 3
CRC_XOR r7, x0, NUM_SKIP_BYTES + 4
CRC_XOR r6, x5, NUM_SKIP_BYTES + 2
MOVZXLO x2, x1
shr x1, 8
CRC_XOR r7, x2, NUM_SKIP_BYTES + 1
CRC_MOV r0, x1, NUM_SKIP_BYTES + 0
xor r0, r6
xor r0, r7
endif ; NUM_WORDS > 1
add rD, NUM_WORDS * 4
jnc @B
sub rN, src_rN_offset
add rD, rN
XOR_NEXT
add rN, NUM_BYTES_LIMIT - 1
sub rN, rD
crc_end:
test rN, rN
jz func_end
@@:
CRC1b
jnz @B
func_end:
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
MY_ENDP
else
; ==================================================================
; x86 (32-bit)
rD equ r1
rN equ r7
rD equ r7
rN equ r1
rT equ r5
xA equ x6
xA_R equ r6
ifdef x64
num_VAR equ r8
else
crc_OFFS equ (REG_SIZE * 5)
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
......@@ -133,107 +251,273 @@ else
table_VAR equ [r4 + table_OFFS]
num_VAR equ table_VAR
endif
endif ; x64
SRCDAT4 equ DWORD PTR [rN + rD * 1]
SRCDAT4 equ dword ptr [rD + rN * 1]
CRC_1 macro op:req, dest:req, src:req, t:req, word_index:req
op dest, DWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t) + (word_index) * 4]
endm
CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]
op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
CRC_1 op0, dest0, src, t, 0
CRC_1 op1, dest1, src, t, 1
endm
CRC_XOR macro dest0:req, dest1:req, src:req, t:req
CRC xor, xor, dest0, dest1, src, t
CRC xor, xor, dest0, dest1, src, t
endm
CRC1b macro
movzx x6, BYTE PTR [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shrd r0, r2, 8
shr r2, 8
CRC_XOR r0, r2, r6, 0
dec rN
endm
MY_PROLOG macro crc_end:req
MY_PUSH_4_REGS
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
mov rN, [r4 + size_OFFS]
mov rD, [r4 + data_OFFS]
movzx xA, BYTE PTR [rD]
inc rD
MOVZXLO x3, x0
xor xA, x3
shrd x0, x2, 8
shr x2, 8
CRC_XOR x0, x2, xA, 0
dec rN
endm
MY_PROLOG_BASE macro
MY_PUSH_4_REGS
ifdef x64
mov r0, REG_ABI_PARAM_0 ; r0 <- r1 / r7
mov rT, REG_ABI_PARAM_3 ; r5 <- r9 / r1
mov rN, REG_ABI_PARAM_2 ; r1 <- r8 / r2
mov rD, REG_ABI_PARAM_1 ; r7 <- r2 / r6
mov r2, r0
shr r2, 32
mov x0, x0
else
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
mov rN, [r4 + size_OFFS]
mov rD, [r4 + data_OFFS]
else
mov rD, REG_ABI_PARAM_0 ; r7 <- r1 : (data)
mov rN, REG_ABI_PARAM_1 ; r1 <- r2 : (size)
endif
mov x0, [r4 + crc_OFFS]
mov x2, [r4 + crc_OFFS + 4]
mov rT, table_VAR
endif
endm
MY_EPILOG_BASE macro crc_end:req, func_end:req
crc_end:
test rN, rN
jz func_end
@@:
CRC1b
jnz @B
func_end:
ifdef x64
shl r2, 32
xor r0, r2
endif
MY_POP_4_REGS
endm
; ALIGN_MASK is 3 or 7 bytes alignment:
ALIGN_MASK equ (7 - (NUM_WORDS and 1) * 4)
if (NUM_WORDS eq 1)
NUM_BYTES_LIMIT_T4 equ (NUM_WORDS * 4 + 4)
MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
MY_PROLOG_BASE
cmp rN, NUM_BYTES_LIMIT_T4 + ALIGN_MASK
jb crc_end_4
@@:
test rD, ALIGN_MASK
jz @F
CRC1b
jmp @B
@@:
xor x0, [rD]
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT_T4 - 1)]
sub rD, rN
add rN, 4
MOVZXLO xA, x0
align 16
@@:
mov x3, SRCDAT4
xor x3, x2
shr x0, 8
CRC xor, mov, x3, x2, xA, 3
MOVZXLO xA, x0
shr x0, 8
; MOVZXHI xA, x0
; shr x0, 16
CRC_XOR x3, x2, xA, 2
MOVZXLO xA, x0
shr x0, 8
CRC_XOR x3, x2, xA, 1
CRC_XOR x3, x2, x0, 0
MOVZXLO xA, x3
mov x0, x3
add rD, 4
jnc @B
sub rN, 4
add rD, rN
xor x0, [rD]
add rN, NUM_BYTES_LIMIT_T4 - 1
sub rN, rD
MY_EPILOG_BASE crc_end_4, func_end_4
MY_ENDP
else ; NUM_WORDS > 1
SHR_X macro x, imm
shr x, imm
endm
ITER_1 macro v0, v1, a, off
MOVZXLO xA, a
SHR_X a, 8
CRC_XOR v0, v1, xA, off
endm
ITER_4 macro v0, v1, a, off
if 0 eq 0
ITER_1 v0, v1, a, off + 3
ITER_1 v0, v1, a, off + 2
ITER_1 v0, v1, a, off + 1
CRC_XOR v0, v1, a, off
elseif 0 eq 0
MOVZXLO xA, a
CRC_XOR v0, v1, xA, off + 3
mov xA, a
ror a, 16 ; 32-bit ror
shr xA, 24
CRC_XOR v0, v1, xA, off
MOVZXLO xA, a
SHR_X a, 24
CRC_XOR v0, v1, xA, off + 1
CRC_XOR v0, v1, a, off + 2
else
; MOVZXHI provides smaller code, but MOVZX_HI_BYTE is not fast instruction
MOVZXLO xA, a
CRC_XOR v0, v1, xA, off + 3
MOVZXHI xA, a
SHR_X a, 16
CRC_XOR v0, v1, xA, off + 2
MOVZXLO xA, a
SHR_X a, 8
CRC_XOR v0, v1, xA, off + 1
CRC_XOR v0, v1, a, off
endif
endm
ITER_1_PAIR macro v0, v1, a0, a1, off
ITER_1 v0, v1, a0, off + 4
ITER_1 v0, v1, a1, off
endm
src_rD_offset equ 8
STEP_SIZE equ (NUM_WORDS * 4)
ITER_12_NEXT macro op, index, v0, v1
op v0, DWORD PTR [rD + (index + 1) * STEP_SIZE - src_rD_offset]
op v1, DWORD PTR [rD + (index + 1) * STEP_SIZE + 4 - src_rD_offset]
endm
ITER_12 macro index, a0, a1, v0, v1
if NUM_SKIP_BYTES eq 0
ITER_12_NEXT mov, index, v0, v1
else
mov rN, r2
k = 0
while k lt NUM_SKIP_BYTES
movzx xA, BYTE PTR [rD + (index) * STEP_SIZE + k + 8 - src_rD_offset]
if k eq 0
CRC mov, mov, v0, v1, xA, NUM_SKIP_BYTES - 1 - k
else
CRC_XOR v0, v1, xA, NUM_SKIP_BYTES - 1 - k
endif
k = k + 1
endm
ITER_12_NEXT xor, index, v0, v1
endif
mov x0, [r4 + crc_OFFS]
mov x2, [r4 + crc_OFFS + 4]
mov rT, table_VAR
test rN, rN
jz crc_end
@@:
test rD, 3
jz @F
CRC1b
jnz @B
@@:
cmp rN, 8
jb crc_end
add rN, rD
mov num_VAR, rN
sub rN, 4
and rN, NOT 3
sub rD, rN
xor r0, SRCDAT4
add rN, 4
endm
MY_EPILOG macro crc_end:req
sub rN, 4
xor r0, SRCDAT4
mov rD, rN
mov rN, num_VAR
sub rN, rD
crc_end:
test rN, rN
jz @F
CRC1b
jmp crc_end
@@:
MY_POP_4_REGS
endm
MY_PROC XzCrc64UpdateT4, 5
MY_PROLOG crc_end_4
movzx x6, x0_L
align 16
main_loop_4:
mov r3, SRCDAT4
xor r3, r2
CRC xor, mov, r3, r2, r6, 3
movzx x6, x0_H
shr r0, 16
CRC_XOR r3, r2, r6, 2
movzx x6, x0_L
movzx x0, x0_H
CRC_XOR r3, r2, r6, 1
CRC_XOR r3, r2, r0, 0
movzx x6, x3_L
mov r0, r3
add rD, 4
jnz main_loop_4
MY_EPILOG crc_end_4
if 0 eq 0
ITER_4 v0, v1, a0, NUM_SKIP_BYTES + 4
ITER_4 v0, v1, a1, NUM_SKIP_BYTES
else ; interleave version is faster/slower for different processors
ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 3
ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 2
ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 1
CRC_XOR v0, v1, a0, NUM_SKIP_BYTES + 4
CRC_XOR v0, v1, a1, NUM_SKIP_BYTES
endif
endm
; we use (UNROLL_CNT > 1) to reduce read ports pressure (num_VAR reads)
UNROLL_CNT equ (2 * 1)
NUM_BYTES_LIMIT equ (STEP_SIZE * UNROLL_CNT + 8)
MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
MY_PROLOG_BASE
cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK
jb crc_end_12
@@:
test rD, ALIGN_MASK
jz @F
CRC1b
jmp @B
@@:
xor x0, [rD]
xor x2, [rD + 4]
add rD, src_rD_offset
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
mov num_VAR, rN
align 16
@@:
i = 0
rept UNROLL_CNT
if (i and 1) eq 0
ITER_12 i, x0, x2, x1, x3
else
ITER_12 i, x1, x3, x0, x2
endif
i = i + 1
endm
if (UNROLL_CNT and 1)
mov x0, x1
mov x2, x3
endif
add rD, STEP_SIZE * UNROLL_CNT
cmp rD, num_VAR
jb @B
mov rN, num_VAR
add rN, NUM_BYTES_LIMIT - 1
sub rN, rD
sub rD, src_rD_offset
xor x0, [rD]
xor x2, [rD + 4]
MY_EPILOG_BASE crc_end_12, func_end_12
MY_ENDP
endif ; (NUM_WORDS > 1)
endif ; ! x64
end
/* 7z.h -- 7z interface
2018-07-02 : Igor Pavlov : Public domain */
2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
#ifndef ZIP7_INC_7Z_H
#define ZIP7_INC_7Z_H
#include "7zTypes.h"
......@@ -98,7 +98,7 @@ typedef struct
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *stream, UInt64 startPos,
ILookInStreamPtr stream, UInt64 startPos,
Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain);
......@@ -174,7 +174,7 @@ UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16
SRes SzArEx_Extract(
const CSzArEx *db,
ILookInStream *inStream,
ILookInStreamPtr inStream,
UInt32 fileIndex, /* index of file */
UInt32 *blockIndex, /* index of solid block */
Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */
......@@ -196,7 +196,7 @@ SZ_ERROR_INPUT_EOF
SZ_ERROR_FAIL
*/
SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream,
ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
EXTERN_C_END
......
/* 7zAlloc.c -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */
/* 7zAlloc.c -- Allocation functions for 7z processing
2023-03-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
......@@ -7,74 +7,83 @@
#include "7zAlloc.h"
/* #define _SZ_ALLOC_DEBUG */
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
/* #define SZ_ALLOC_DEBUG */
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG
#ifdef SZ_ALLOC_DEBUG
/*
#ifdef _WIN32
#include <windows.h>
#include "7zWindows.h"
#endif
*/
#include <stdio.h>
int g_allocCount = 0;
int g_allocCountTemp = 0;
static int g_allocCount = 0;
static int g_allocCountTemp = 0;
static void Print_Alloc(const char *s, size_t size, int *counter)
{
const unsigned size2 = (unsigned)size;
fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2);
(*counter)++;
}
static void Print_Free(const char *s, int *counter)
{
(*counter)--;
fprintf(stderr, "\n%s count = %10d", s, *counter);
}
#endif
void *SzAlloc(ISzAllocPtr p, size_t size)
{
UNUSED_VAR(p);
UNUSED_VAR(p)
if (size == 0)
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount);
g_allocCount++;
#ifdef SZ_ALLOC_DEBUG
Print_Alloc("Alloc", size, &g_allocCount);
#endif
return malloc(size);
}
void SzFree(ISzAllocPtr p, void *address)
{
UNUSED_VAR(p);
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
{
g_allocCount--;
fprintf(stderr, "\nFree; count = %10d", g_allocCount);
}
UNUSED_VAR(p)
#ifdef SZ_ALLOC_DEBUG
if (address)
Print_Free("Free ", &g_allocCount);
#endif
free(address);
}
void *SzAllocTemp(ISzAllocPtr p, size_t size)
{
UNUSED_VAR(p);
UNUSED_VAR(p)
if (size == 0)
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc_temp %10u bytes; count = %10d", (unsigned)size, g_allocCountTemp);
g_allocCountTemp++;
#ifdef SZ_ALLOC_DEBUG
Print_Alloc("Alloc_temp", size, &g_allocCountTemp);
/*
#ifdef _WIN32
return HeapAlloc(GetProcessHeap(), 0, size);
#endif
*/
#endif
return malloc(size);
}
void SzFreeTemp(ISzAllocPtr p, void *address)
{
UNUSED_VAR(p);
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
{
g_allocCountTemp--;
fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
}
UNUSED_VAR(p)
#ifdef SZ_ALLOC_DEBUG
if (address)
Print_Free("Free_temp ", &g_allocCountTemp);
/*
#ifdef _WIN32
HeapFree(GetProcessHeap(), 0, address);
return;
#endif
*/
#endif
free(address);
}
/* 7zAlloc.h -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */
2023-03-04 : Igor Pavlov : Public domain */
#ifndef __7Z_ALLOC_H
#define __7Z_ALLOC_H
#ifndef ZIP7_INC_7Z_ALLOC_H
#define ZIP7_INC_7Z_ALLOC_H
#include "7zTypes.h"
......
This diff is collapsed.
File mode changed from 100755 to 100644
/* 7zBuf.h -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
2023-03-04 : Igor Pavlov : Public domain */
#ifndef __7Z_BUF_H
#define __7Z_BUF_H
#ifndef ZIP7_INC_7Z_BUF_H
#define ZIP7_INC_7Z_BUF_H
#include "7zTypes.h"
......
File mode changed from 100755 to 100644
/* 7zCrc.c -- CRC32 init
2021-04-01 : Igor Pavlov : Public domain */
/* 7zCrc.c -- CRC32 calculation and init
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zCrc.h"
#include "CpuArch.h"
#define kCrcPoly 0xEDB88320
// for debug:
// #define __ARM_FEATURE_CRC32 1
#ifdef MY_CPU_LE
#define CRC_NUM_TABLES 8
#else
#define CRC_NUM_TABLES 9
#ifdef __ARM_FEATURE_CRC32
// #pragma message("__ARM_FEATURE_CRC32")
#define Z7_CRC_HW_FORCE
#endif
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
// #define Z7_CRC_DEBUG_BE
#ifdef Z7_CRC_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#ifdef Z7_CRC_HW_FORCE
#define Z7_CRC_NUM_TABLES_USE 1
#else
#ifdef Z7_CRC_NUM_TABLES
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
#else
#define Z7_CRC_NUM_TABLES_USE 12
#endif
#endif
#ifndef MY_CPU_BE
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
extern
CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT4;
extern
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
extern
CRC_FUNC g_CrcUpdateT0_32;
CRC_FUNC g_CrcUpdateT0_32;
extern
CRC_FUNC g_CrcUpdateT0_64;
CRC_FUNC g_CrcUpdateT0_64;
extern
CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
{
return g_CrcUpdate(v, data, size, g_CrcTable);
}
#if Z7_CRC_NUM_TABLES_USE < 1
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
{
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
}
#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1)
#define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE
#else
#define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1)
#endif
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
#ifndef Z7_CRC_HW_FORCE
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
#if Z7_CRC_NUM_TABLES_USE == 1 \
|| (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
#define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1
static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size)
{
const UInt32 *table = g_CrcTable;
const Byte *p = (const Byte *)data;
const Byte *pEnd = p + size;
for (; p != pEnd; p++)
const Byte *lim = p + size;
for (; p != lim; p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
#endif
#if Z7_CRC_NUM_TABLES_USE != 1
#ifndef MY_CPU_BE
#define FUNC_NAME_LE_2(s) CrcUpdateT ## s
#define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s)
#define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE)
UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#ifndef MY_CPU_LE
#define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s
#define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s)
#define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE)
UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#endif
#endif // Z7_CRC_HW_FORCE
/* ---------- hardware CRC ---------- */
#ifdef MY_CPU_LE
#if defined(MY_CPU_ARM_OR_ARM64)
// #pragma message("ARM*")
#if defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#define USE_ARM64_CRC
#endif
#endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4))
#if (defined(__clang__) && (__clang_major__ >= 3)) \
|| defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#if !defined(__ARM_FEATURE_CRC32)
// #pragma message("!defined(__ARM_FEATURE_CRC32)")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define __ARM_FEATURE_CRC32 1
#if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRC32_WAS_SET
#if defined(__clang__)
#if defined(MY_CPU_ARM64)
#define ATTRIB_CRC __attribute__((__target__("crc")))
#else
#define ATTRIB_CRC __attribute__((__target__("armv8-a,crc")))
#endif
#else
#if defined(MY_CPU_ARM64)
#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000)
#define ATTRIB_CRC __attribute__((__target__("+crc")))
#endif
#else
#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8)
#if defined(__ARM_FP) && __GNUC__ >= 8
// for -mfloat-abi=hard: similar to <arm_acle.h>
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd")))
#else
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif
#endif
#endif
#endif
#endif
#if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC
// #pragma message("<arm_acle.h>")
/*
arm_acle.h (GGC):
before Nov 17, 2017:
#ifdef __ARM_FEATURE_CRC32
Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked"
#if __ARM_ARCH >= 8
#pragma GCC target ("arch=armv8-a+crc")
Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1:
#ifdef __ARM_FEATURE_CRC32
#ifdef __ARM_FP
#pragma GCC target ("arch=armv8-a+crc+simd")
#else
#pragma GCC target ("arch=armv8-a+crc")
#endif
*/
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif
#define Z7_CRC_HW_USE
#include <arm_acle.h>
#endif
#elif defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#ifdef __clang__
// #define Z7_CRC_HW_USE
// #include <arm_acle.h>
#else
#define Z7_CRC_HW_USE
#include <intrin.h>
#endif
#endif
#endif
#endif
#else
// no hardware CRC
// #define USE_CRC_EMU
#ifdef USE_CRC_EMU
#pragma message("ARM64 CRC emulation")
#else // non-ARM*
MY_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
return v;
}
MY_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code
#ifdef Z7_CRC_HW_USE
#include "7zCrcEmu.h"
#endif
MY_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
#endif // non-ARM*
#endif // USE_CRC_EMU
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
#if defined(Z7_CRC_HW_USE)
// #pragma message("USE ARM HW CRC")
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#ifdef MY_CPU_64BIT
#define CRC_HW_WORD_TYPE UInt64
#define CRC_HW_WORD_FUNC __crc32d
#else
#define CRC_HW_WORD_TYPE UInt32
#define CRC_HW_WORD_FUNC __crc32w
#endif
#define T0_32_UNROLL_BYTES (4 * 4)
#define T0_64_UNROLL_BYTES (4 * 8)
#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4)
#ifndef ATTRIB_CRC
#define ATTRIB_CRC
#ifdef ATTRIB_CRC
ATTRIB_CRC
#endif
// #pragma message("USE ARM HW CRC")
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
Z7_NO_INLINE
#ifdef Z7_CRC_HW_FORCE
UInt32 Z7_FASTCALL CrcUpdate
#else
static UInt32 Z7_FASTCALL CrcUpdate_HW
#endif
(UInt32 v, const void *data, size_t size)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_32_UNROLL_BYTES)
if (size >= CRC_HW_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_32_UNROLL_BYTES - 1);
size &= CRC_HW_UNROLL_BYTES - 1;
lim -= size;
do
{
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
p += 2 * sizeof(CRC_HW_WORD_TYPE);
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
p += 2 * sizeof(CRC_HW_WORD_TYPE);
}
while (p != lim);
}
......@@ -187,136 +223,198 @@ UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, cons
return v;
}
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#undef __ARM_FEATURE_CRC32
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#undef Z7_ARM_FEATURE_CRC32_WAS_SET
#endif
#endif // defined(Z7_CRC_HW_USE)
#endif // MY_CPU_LE
#ifndef Z7_CRC_HW_FORCE
#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
/*
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC)
(UInt32 v, const void *data, size_t size, const UInt32 *table);
Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate;
*/
static unsigned g_Crc_Algo;
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
static unsigned g_Crc_Be;
#endif
#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
Z7_NO_INLINE
#ifdef Z7_CRC_HW_USE
static UInt32 Z7_FASTCALL CrcUpdate_Base
#else
UInt32 Z7_FASTCALL CrcUpdate
#endif
(UInt32 crc, const void *data, size_t size)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
#if Z7_CRC_NUM_TABLES_USE == 1
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
#else // Z7_CRC_NUM_TABLES_USE != 1
#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME
if (g_Crc_Algo == 1)
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
#endif
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
#ifdef MY_CPU_LE
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
#elif defined(MY_CPU_BE)
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
#else
if (g_Crc_Be)
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
else
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
#endif
#endif // Z7_CRC_NUM_TABLES_USE != 1
}
if (size >= T0_64_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_64_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
#ifdef Z7_CRC_HW_USE
Z7_NO_INLINE
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size)
{
if (g_Crc_Algo == 0)
return CrcUpdate_HW(crc, data, size);
return CrcUpdate_Base(crc, data, size);
}
#endif
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#endif // !defined(Z7_CRC_HW_FORCE)
#endif // MY_CPU_LE
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
{
return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL;
}
MY_ALIGN(64)
UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL];
void MY_FAST_CALL CrcGenerateTable()
void Z7_FASTCALL CrcGenerateTable(void)
{
UInt32 i;
for (i = 0; i < 256; i++)
{
#if defined(Z7_CRC_HW_FORCE)
g_CrcTable[i] = __crc32b(i, 0);
#else
#define kCrcPoly 0xEDB88320
UInt32 r = i;
unsigned j;
for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
g_CrcTable[i] = r;
#endif
}
for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++)
{
UInt32 r = g_CrcTable[(size_t)i - 256];
const UInt32 r = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
}
#if CRC_NUM_TABLES < 4
g_CrcUpdate = CrcUpdateT1;
#else
#ifdef MY_CPU_LE
#if !defined(Z7_CRC_HW_FORCE) && \
(defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE))
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if Z7_CRC_NUM_TABLES_USE <= 1
g_Crc_Algo = 1;
#else // Z7_CRC_NUM_TABLES_USE <= 1
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
#ifdef MY_CPU_X86_OR_AMD64
if (!CPU_Is_InOrder())
#endif
g_CrcUpdate = CrcUpdateT8;
#endif
#else
#if defined(MY_CPU_LE)
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#else // !defined(MY_CPU_LE)
{
#ifndef MY_CPU_BE
#ifndef MY_CPU_BE
UInt32 k = 0x01020304;
const Byte *p = (const Byte *)&k;
if (p[0] == 4 && p[1] == 3)
{
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
g_CrcUpdate = CrcUpdateT8;
#endif
}
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
else if (p[0] != 1 || p[1] != 2)
g_CrcUpdate = CrcUpdateT1;
g_Crc_Algo = 1;
else
#endif
#endif // MY_CPU_BE
{
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--)
{
UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = CRC_UINT32_SWAP(x);
const UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = Z7_BSWAP32(x);
}
g_CrcUpdateT4 = CrcUpdateT1_BeT4;
g_CrcUpdate = CrcUpdateT1_BeT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
g_CrcUpdate = CrcUpdateT1_BeT8;
#endif
#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#endif
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
g_Crc_Be = 1;
#endif
}
}
#endif
#endif
#endif // !defined(MY_CPU_LE)
#ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC
if (CPU_IsSupported_CRC32())
{
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate =
#if defined(MY_CPU_ARM)
CrcUpdateT0_32;
#else
CrcUpdateT0_64;
#endif
}
#endif
#ifdef USE_CRC_EMU
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate = CrcUpdateT0_64;
#endif
#ifdef MY_CPU_LE
#ifdef Z7_CRC_HW_USE
if (CPU_IsSupported_CRC32())
g_Crc_Algo = 0;
#endif // Z7_CRC_HW_USE
#endif // MY_CPU_LE
#endif // Z7_CRC_NUM_TABLES_USE <= 1
#endif // g_Crc_Algo was declared
}
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo)
{
if (algo == 0)
return &CrcUpdate;
#if defined(Z7_CRC_HW_USE)
if (algo == sizeof(CRC_HW_WORD_TYPE) * 8)
{
#ifdef Z7_CRC_HW_FORCE
return &CrcUpdate;
#else
if (g_Crc_Algo == 0)
return &CrcUpdate_HW;
#endif
}
#endif
#ifndef Z7_CRC_HW_FORCE
if (algo == Z7_CRC_NUM_TABLES_USE)
return
#ifdef Z7_CRC_HW_USE
&CrcUpdate_Base;
#else
&CrcUpdate;
#endif
#endif
return NULL;
}
#undef kCrcPoly
#undef Z7_CRC_NUM_TABLES_USE
#undef Z7_CRC_NUM_TABLES_TOTAL
#undef CRC_UPDATE_BYTE_2
#undef FUNC_NAME_LE_2
#undef FUNC_NAME_LE_1
#undef FUNC_NAME_LE
#undef FUNC_NAME_BE_2
#undef FUNC_NAME_BE_1
#undef FUNC_NAME_BE
#undef CRC_HW_UNROLL_BYTES
#undef CRC_HW_WORD_FUNC
#undef CRC_HW_WORD_TYPE
/* 7zCrc.h -- CRC32 calculation
2013-01-18 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef __7Z_CRC_H
#define __7Z_CRC_H
#ifndef ZIP7_INC_7Z_CRC_H
#define ZIP7_INC_7Z_CRC_H
#include "7zTypes.h"
......@@ -11,14 +11,17 @@ EXTERN_C_BEGIN
extern UInt32 g_CrcTable[];
/* Call CrcGenerateTable one time before other CRC functions */
void MY_FAST_CALL CrcGenerateTable(void);
void Z7_FASTCALL CrcGenerateTable(void);
#define CRC_INIT_VAL 0xFFFFFFFF
#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size);
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size);
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo);
EXTERN_C_END
......