wok-tiny diff linux/stuff/unlzsa1.S @ rev 179
linux: rewrite stuff/pack
author | Pascal Bellard <pascal.bellard@slitaz.org> |
---|---|
date | Tue Aug 15 09:20:55 2023 +0000 (9 months ago) |
parents | 2c80994c5e30 |
children |
line diff
1.1 --- a/linux/stuff/unlzsa1.S Wed Jul 14 14:20:00 2021 +0000 1.2 +++ b/linux/stuff/unlzsa1.S Tue Aug 15 09:20:55 2023 +0000 1.3 @@ -1,143 +1,211 @@ 1.4 -// based on 1.5 -// decompress_small.S - space-efficient decompressor implementation for 8088 1.6 -// 1.7 -// Copyright (C) 2019 Emmanuel Marty 1.8 -// 1.9 -// This software is provided 'as-is', without any express or implied 1.10 -// warranty. In no event will the authors be held liable for any damages 1.11 -// arising from the use of this software. 1.12 -// 1.13 -// Permission is granted to anyone to use this software for any purpose, 1.14 -// including commercial applications, and to alter it and redistribute it 1.15 -// freely, subject to the following restrictions: 1.16 -// 1.17 -// 1. The origin of this software must not be misrepresented; you must not 1.18 -// claim that you wrote the original software. If you use this software 1.19 -// in a product, an acknowledgment in the product documentation would be 1.20 -// appreciated but is not required. 1.21 -// 2. Altered source versions must be plainly marked as such, and must not be 1.22 -// misrepresented as being the original software. 1.23 -// 3. This notice may not be removed or altered from any source distribution. 1.24 - 1.25 -// --------------------------------------------------------------------------- 1.26 -// Decompress raw LZSA1 block 1.27 -// inputs: 1.28 -// * %ds:%si: raw LZSA1 block 1.29 -// * %es:%di: output buffer 1.30 -// --------------------------------------------------------------------------- 1.31 - 1.32 +// Lzsa1Decode: 1.33 +#ifndef FLAT32 1.34 +// input ds:si=inStream, es:di=outStream 1.35 +// output outStream[], ds:si, es:di 1.36 .code16 1.37 -lzsa1_decompress: 1.38 - //pushw %di // remember decompression offset 1.39 - //cld // make string operations (lods, movs, stos..) move forward 1.40 - 1.41 -lzsa1_decode_token: 1.42 - xorb %ah, %ah // clear %ah 1.43 - lodsb // read token byte: O|LLL|MMMM 1.44 - movw %ax,%bx // keep token in %bl 1.45 - 1.46 - andb $0x70, %al // isolate literals length in token (LLL) 1.47 - je lzsa1_check_offset_size // if LLL=0, we have no literals; goto match 1.48 -#ifdef ONLY8086 1.49 - movb $4, %cl 1.50 - shrb %cl, %al // shift literals length into place 1.51 +#define AX %ax 1.52 +#define BX %bx 1.53 +#define SI %si 1.54 +#define DI %di 1.55 #else 1.56 - shrb $4, %al // shift literals length into place 1.57 +// input esi=inStream, edi=outStream 1.58 +// output outStream[], ds:esi, es:edi 1.59 + .code32 1.60 +#define AX %eax 1.61 +#define BX %ebx 1.62 +#define SI %esi 1.63 +#define DI %edi 1.64 #endif 1.65 1.66 - cmpb $7, %al // LITERALS_RUN_LEN? 1.67 - jne lzsa1_got_literals // no, we have the full literals count from the token, go copy 1.68 +MATCH_RUN_LEN = 15 1.69 +LITERALS_RUN_LEN = 7 1.70 +MIN_MATCH_SIZE = 3 1.71 +MIN_LITERALS_SIZE = 0 1.72 1.73 - lodsb // grab extra length byte 1.74 - addb $7, %al // add LITERALS_RUN_LEN 1.75 - jnc lzsa1_got_literals // if no overflow, we have the full literals count, go copy 1.76 - je lzsa1_big_literals 1.77 +#define PACKED_ONLY // assume no copy block, optional 1.78 +//#define PARANOIA // cover rare cases, optional 1.79 + 1.80 +.macro shrclw cnt,obj 1.81 +#ifdef ONLY8086 1.82 + movb \cnt, %cl 1.83 + shrw %cl, \obj 1.84 +#else 1.85 + shrw \cnt, \obj 1.86 +#endif 1.87 +.endm 1.88 1.89 - movb $1, %ah // add 256 (I'd prefer 'xchgb %al, %ah' max 1791 instead of 511) 1.90 - lodsb // grab single extra length byte 1.91 - .byte 0x3C // mask lodsw with cmpb $0xAD, %al 1.92 - // (*like jmp short lzsa1_got_literals but faster) 1.93 - 1.94 -lzsa1_big_literals: 1.95 - lodsw // grab 16-bit extra length 1.96 - 1.97 -lzsa1_got_literals: 1.98 +#ifdef FLAT16OUT 1.99 +#define RAW_FORMAT 1.100 +#endif 1.101 +lzsa1main: 1.102 +#ifdef PARANOIA 1.103 + cld 1.104 +#endif 1.105 +#ifndef RAW_FORMAT 1.106 +# ifndef NO_LZSA1_HEADER 1.107 + lodsw 1.108 + cmpw $0x9E7B, %ax // magic 1.109 + jne lzsa1main 1.110 + lodsb 1.111 +// cmpb $0, %al // lzsa1 1.112 +// jne lzsa1main 1.113 +# endif 1.114 + xorw %ax, %ax 1.115 + xchgw %ax, %di 1.116 + shrclw $4, %ax 1.117 + jmp lzsa1blockz // %di *MUST* be paragraph aligned 1.118 +# ifndef PACKED_ONLY 1.119 +lzsa1copy: 1.120 + movsb // handle 64K case 1.121 + decw %cx 1.122 + rep movsb // copy block 1.123 +# endif 1.124 +lzsa1block: // uncompress chunk 1.125 + movw $0x1000, %ax 1.126 +lzsa1blockz: 1.127 + movw %es, %bx 1.128 + addw %ax, %bx 1.129 + movw %bx, %es 1.130 +# ifndef FLAT16 1.131 + movw %si, %ax 1.132 + andw $0xf, %si 1.133 + shrclw $4, %ax 1.134 + movw %ds, %bx 1.135 + addw %ax, %bx 1.136 + movw %bx, %ds 1.137 +# endif 1.138 + lodsw // block size 1.139 xchgw %ax, %cx 1.140 -#ifdef USE_MOVSW 1.141 - shrw $1, %cx 1.142 - rep movsw 1.143 - adcw %cx, %cx 1.144 + movw %cx, %dx 1.145 + lodsb 1.146 +# ifndef PACKED_ONLY 1.147 + orb %al, %al 1.148 + js lzsa1copy 1.149 + jne lzsa1full // 64Kb block 1.150 +# endif 1.151 + jcxz lzsa1quit // bail if we hit EOD 1.152 +lzsa1full: 1.153 + addw %si, %dx 1.154 #endif 1.155 - rep movsb // copy %cx literals from %ds:%si to %es:%di 1.156 - 1.157 -lzsa1_check_offset_size: 1.158 +lzsa1chunk: // uncompress chunk 1.159 + lodsb // get token O|LLL|MMMM 1.160 + movb %al, %bl // keep token in bl 1.161 + shrclw $4, %ax // shift literals length into place 1.162 + movw $LITERALS_RUN_LEN+256*MIN_LITERALS_SIZE, %cx 1.163 + call lzsa1len // %ch = LITERALS_RUN_LEN 1.164 + rep movsb // copy %cx literals from %ds:%si to %es:%di 1.165 +#ifndef RAW_FORMAT 1.166 + cmpw %dx, %si 1.167 + je lzsa1block // bail if we hit EOD 1.168 +#endif 1.169 +#ifdef FLAT32 1.170 + orl $-1, %eax 1.171 +#endif 1.172 testb %bl, %bl // check match offset size in token (O bit) 1.173 - js lzsa1_get_long_offset 1.174 - 1.175 - decw %cx 1.176 - xchgw %ax, %cx // %ah to 0xff - %cx was zero from the rep movsb above 1.177 + js lzsa1LongOfs 1.178 +#ifndef FLAT32 1.179 + movb $-1, %ah // set offset bits 15-8 to 1 1.180 +#endif 1.181 lodsb 1.182 .byte 0x3C // mask lodsw with cmpb $0xAD, %al 1.183 - // (*like jmp short lzsa1_get_match_length but faster) 1.184 - 1.185 -lzsa1_get_long_offset: 1.186 - lodsw // Get 2-byte match offset 1.187 - 1.188 -lzsa1_get_match_length: 1.189 - xchgw %ax, %bx // %bx: match offset %ax: original token 1.190 - andb $0xF, %al // isolate match length in token (MMMM) 1.191 - addb $3, %al // add MIN_MATCH_SIZE 1.192 - 1.193 - cmpb $0x12, %al // MATCH_RUN_LEN? 1.194 - jne lzsa1_got_matchlen // no, we have the full match length from the token, go copy 1.195 - 1.196 - lodsb // grab extra length byte 1.197 - addb $0x12, %al // add MIN_MATCH_SIZE + MATCH_RUN_LEN 1.198 - jnc lzsa1_got_matchlen // if no overflow, we have the entire length 1.199 - je lzsa1_big_matchlen 1.200 - 1.201 - movb $1, %ah // add 256 (I'd prefer 'xchgb %al, %ah' max 3071 instead of 511) 1.202 - lodsb // grab single extra length byte 1.203 - .byte 0x3C // mask lodsw with cmpb $0xAD, %al 1.204 - // (*like jmp short lzsa1_got_matchlen but faster) 1.205 -lzsa1_big_matchlen: 1.206 - lodsw // grab 16-bit length 1.207 - 1.208 -lzsa1_got_matchlen: 1.209 - xchgw %ax, %cx // copy match length into %cx 1.210 - jcxz lzsa1_done_decompressing // bail if we hit EOD 1.211 - xchgw %ax, %si // save %si (current pointer to compressed data) 1.212 - leaw (%bx,%di), %si // %es:%si now points at back reference in output data 1.213 -#ifdef USE_MOVSW 1.214 - cmpw $-2, %bx 1.215 - jae lzsa1_store 1.216 - shrw $1, %cx 1.217 - rep movsw %es:(%si), %es:(%di) 1.218 - adcw %cx, %cx 1.219 -#endif 1.220 - rep movsb %es:(%si), %es:(%di) // copy match 1.221 - xchgw %ax, %si // restore %ds:%si 1.222 - jmp lzsa1_decode_token // go decode another token 1.223 -#ifdef USE_MOVSW 1.224 -lzsa1_store: 1.225 - je lzsa1_store_word 1.226 - lodsb %es:(%si) 1.227 - movb %al, %ah 1.228 - .byte 0x3D // mask lodsw with cmpb $0x26AD, %ax 1.229 - // (*like jmp short lzsa1_store_byte but faster) 1.230 -lzsa1_store_word: 1.231 - lodsw %es:(%si) 1.232 -lzsa1_store_byte: 1.233 - shrw $1, %cx 1.234 - rep stosw 1.235 - adcw %cx, %cx 1.236 - rep stosb 1.237 - xchgw %ax, %si // restore %ds:%si 1.238 - jmp lzsa1_decode_token // go decode another token 1.239 +lzsa1LongOfs: 1.240 + lodsw 1.241 + xchg AX, BX // %bx: match offset %ax: original token 1.242 + movw $MATCH_RUN_LEN+256*MIN_MATCH_SIZE, %cx 1.243 + call lzsa1len 1.244 +#ifdef RAW_FORMAT 1.245 + jcxz lzsa1quit // bail if we hit EOD 1.246 #endif 1.247 1.248 -lzsa1_done_decompressing: 1.249 -// popw %ax // retrieve the original decompression offset 1.250 -// xchgw %ax, %di // compute decompressed size 1.251 -// subw %di, %ax 1.252 - ret // done 1.253 +#if !defined(FLAT16OUT) && !defined(FLAT32) 1.254 + xchg AX, SI // save %si 1.255 + lea (BX,DI), SI 1.256 + pushw %ds 1.257 + movw %es, %bp 1.258 + cmpw %si, %di 1.259 + jnc lzsa1sameSeg 1.260 + pushw %si 1.261 +# ifdef ONLY8086 1.262 + pushw %cx 1.263 +# endif 1.264 + shrclw $4, %si 1.265 +# ifdef ONLY8086 1.266 + popw %cx 1.267 +# endif 1.268 + lea -4096(%bp,%si), %bp 1.269 + popw %si 1.270 + andw $0xF, %si 1.271 +lzsa1sameSeg: 1.272 + movw %bp, %ds 1.273 +# ifdef FASTFILL 1.274 + cmp $-FASTFILL,BX 1.275 + jae lzsa1fast 1.276 +# endif 1.277 + rep movsb 1.278 +lzsa1chunkz: 1.279 + popw %ds 1.280 +#else 1.281 +# ifdef FASTFILL 1.282 + cmp $-FASTFILL,BX 1.283 + jae lzsa1fast 1.284 +# endif 1.285 + xchg AX, SI // save %si 1.286 + lea (BX,DI), SI 1.287 +# ifdef ONLY8086 1.288 +lzsa2movsb: 1.289 + movsb %es:(SI), %es:(DI) // NMOS 8088/8086 workaround. 1.290 + loop lzsa2movsb 1.291 +# else 1.292 + rep movsb %es:(SI), %es:(DI) 1.293 +# endif 1.294 +#define lzsa1chunkz lzsa1chunk 1.295 +#endif 1.296 + xchg AX, SI // restore %si 1.297 + jmp lzsa1chunk 1.298 +#ifdef FASTFILL 1.299 +lzsa1fast: 1.300 +# if FASTFILL == 1 1.301 +# if !defined(FLAT16OUT) && !defined(FLAT32) 1.302 + lodsb 1.303 +# else 1.304 + movb %es:(BX,DI), %al 1.305 +# endif 1.306 + rep stosb 1.307 +# endif 1.308 +# if FASTFILL == 2 1.309 +# if !defined(FLAT16OUT) && !defined(FLAT32) 1.310 + lodsw 1.311 +# else 1.312 + movw %es:(BX,DI), %ax 1.313 +# endif 1.314 + je lzsa1fastword 1.315 + movb %ah, %al 1.316 +lzsa1fastword: 1.317 + shr $1, CX 1.318 + rep stosw 1.319 + jnc lzsa1chunkz 1.320 + stosb 1.321 +# endif 1.322 + jmp lzsa1chunkz 1.323 +#endif 1.324 + 1.325 +lzsa1len: // get length in %ecx 1.326 + andb %cl, %al 1.327 + cbw // clear %ah 1.328 + cmpb %al, %cl 1.329 + jne lzsa1minNumber // S=0-6, L=0-14 %cx = %ch + %al if (%al & %cl != %cl) 1.330 + addb %al, %ch 1.331 + lodsb 1.332 +lzsa1minNumber: 1.333 + addb %ch, %al 1.334 + jnc lzsa1gotNumber // 0-255 %cx = %ch + %cl + byte if (%al & %cl == %cl && %ch + %cl + byte < 0x100) 1.335 + movb %al, %ah // S=256-1791, L=256-4607 or S=256-511, L=256-511 1.336 + jne lzsa1midNumber 1.337 + lodsw // 0-65535 %cx = word if (%al & %cl == %cl && %ch + %cl + byte == 0x100) 1.338 + .byte 0x3C // mask lodsb with cmpb $0xAC, %al 1.339 +lzsa1midNumber: 1.340 + lodsb // %cx = (%ch + %cl + byte)*256 + byte2 if (%al & %cl == %cl && %ch + %cl + byte > 0x100) 1.341 +lzsa1gotNumber: 1.342 + xchgw %ax, %cx 1.343 +lzsa1quit: 1.344 + ret