wok view plop/stuff/unlz4.S @ rev 25647

Backout salvador 10% speedup: crashes with large files
author Pascal Bellard <pascal.bellard@slitaz.org>
date Fri Jan 26 12:15:51 2024 +0000 (9 months ago)
parents f2b4a9eb8bdd
children 1b965c2713aa
line source
1 // Lz4Decode:
2 #ifndef FLAT32
3 // input ds:si=inStream, es:di=outStream
4 // output outStream[], ds:si, es:di
5 .code16
6 #define AX %ax
7 #define BX %bx
8 #define CX %cx
9 #define SI %si
10 #define DI %di
11 #else
12 // input esi=inStream, edi=outStream
13 // output outStream[], ds:esi, es:edi
14 .code32
15 #define AX %eax
16 #define BX %ebx
17 #define CX %ecx
18 #define SI %esi
19 #define DI %edi
20 #endif
22 #if defined(FLAT16OUT) || defined(ONLY8086)
23 #define cAX %ax
24 #define cCX %cx
25 #define cDX %dx
26 #else
27 #define cAX %eax
28 #define cCX %ecx
29 #define cDX %edx
30 #endif
32 #define ARCHIVE_MAGICNUMBER 0x184C2102
34 //#define PARANOIA // cover rare cases, optional
36 lz4main:
37 #ifdef PARANOIA
38 cld
39 # if !defined(FLAT32) && !defined(FLAT16OUT)
40 xorl %cx, %cx
41 call lz4mov
42 # endif
43 #endif
44 #if defined(FLAT16OUT) || defined(ONLY8086)
45 lodsw // get chunkSize
46 # ifndef NO_LZ4_HEADER
47 cmpw $ARCHIVE_MAGICNUMBER&0xFFFF, %ax
48 # endif
49 xchg %ax, %dx
50 lodsw
51 # ifndef NO_LZ4_HEADER
52 jne chkeof
53 cmpw $ARCHIVE_MAGICNUMBER>>16, %ax
54 je lz4main
55 chkeof:
56 # endif
57 # ifdef PARANOIA
58 orw %ax, %dx // end of file ?
59 je lz4quit
60 # endif
61 # ifndef FLAT16OUT
62 xchgw %ax, %bp
63 subw $1, %dx
64 sbbw $0, %bp
65 # endif
66 #else
67 lodsl // get chunkSize
68 # ifndef NO_LZ4_HEADER
69 cmpl $ARCHIVE_MAGICNUMBER, %eax
70 je lz4main
71 # endif
72 # ifdef PARANOIA
73 orl %eax, %eax // end of file ?
74 je lz4quit
75 # endif
76 xchgl %eax, %edx
77 #endif
78 lz4chunk: // uncompress chunk
79 lodsb // get token
80 pushw %ax
81 #ifdef ONLY8086
82 movb $4, %cl
83 shrb %cl, %al
84 #else
85 shrb $4, %al
86 #endif
87 call lz4len // get literal length
88 #if !defined(FLAT16OUT) && defined(ONLY8086)
89 subw %cx, %dx // count literal
90 sbbw $0, %bp
91 #else
92 sub cCX, cDX // count literal
93 #endif
94 #if !defined(FLAT32) && !defined(FLAT16OUT) && (!defined(FLAT16) || !defined(PARANOIA))
95 #define NeedLz4mov
96 call lz4mov // copy literals
97 #else
98 rep movsb
99 #endif
100 popw %bx
101 #if !defined(FLAT16OUT) && defined(ONLY8086)
102 subw $1+2, %dx // count token & string address
103 sbbw $0, %bp
104 jb lz4quit
105 lz4cont:
106 #else
107 sub $1+2, cDX // count token & string address
108 jbe lz4quit
109 #endif
110 lodsw // get string address
111 xchg AX, BX
112 call lz4len // get string length
113 add $4, CX
114 #if !defined(FLAT32) && !defined(FLAT16OUT)
115 pushw %ds
116 pushw %si
117 movw %di, %si
118 subw %bx, %si
119 movw %es, %ax
120 jnc axok
121 subb $0x10, %ah
122 axok:
123 .macro norm reg
124 movw %si, \reg
125 andw $0xF, %si
126 # ifdef ONLY8086
127 pushw %cx
128 movb $4, %cl
129 shrw %cl, \reg
130 # else
131 shrw $4, \reg
132 # endif
133 addw \reg, %ax
134 movw %ax, %ds
135 movw %di, \reg
136 andw $0xF, %di
137 # ifdef ONLY8086
138 shrw %cl, \reg
139 popw %cx
140 # else
141 shrw $4, \reg
142 # endif
143 movw %es, %ax
144 addw \reg, %ax
145 movw %ax, %es
146 .endm
147 # if !defined (NeedLz4mov)
148 norm %bx
149 rep movsb
150 # else
151 call lz4movStr // copy string
152 # endif
153 popw %si
154 popw %ds
155 #else
156 # ifdef FASTFILL
157 cmp $FASTFILL,BX
158 jbe lz4fast
159 # endif
160 lz4notfast:
161 xchg AX, SI
162 mov DI, SI
163 sub BX, SI
164 # ifdef ONLY8086
165 lz4movsb:
166 movsb %es:(SI), %es:(DI) // NMOS 8088/8086 workaround.
167 loop lz4movsb
168 # else
169 rep movsb %es:(SI), %es:(DI)
170 # endif
171 xchg AX, SI
172 # ifdef FASTFILL
173 lz4fast:
174 # if FASTFILL == 1
175 movb %es:-1(DI), %al
176 rep stosb
177 # endif
178 # if FASTFILL >= 2
179 movw %es:-2(DI), %ax
180 je lz4fastword
181 movb %al, %ah
182 lz4fastword:
183 shr $1, CX
184 rep stosw
185 jnc lz4chunk
186 stosb
187 # endif
188 # endif
189 jmp lz4chunk
190 #endif
192 #if defined(NeedLz4mov)
193 # if defined(PARANOIA)
194 lz4movlp:
195 xchgw %ax, %cx
196 movw $0x7800, %cx
197 rep movsw
198 xchgw %ax, %cx
199 sub $0xF0, %ch
200 # endif
201 lz4mov:
202 movw %ds, %ax
203 lz4movStr:
204 norm %bx
205 # if defined(PARANOIA)
206 cmp $0xFF, %ch // catch FFFX case
207 jz lz4movlp
208 # endif
209 rep movsb
210 ret
211 #endif
213 lz4len: // get length in %ecx
214 and $0xF, cAX
215 mov cAX, cCX
216 cmpb $0xF, %al
217 jne lz4quit
218 lz4len2:
219 lodsb
220 #if !defined(FLAT16OUT) && defined(ONLY8086)
221 subw $1, %dx // remaining chunk size
222 sbbw $0, %bp
223 #else
224 dec cDX // remaining chunk size
225 #endif
226 add AX, CX
227 cmpb $0xFF, %al
228 je lz4len2
229 lz4quit:
230 ret