wok-tiny diff x86test/stuff/unlzma.S @ rev 172

Add x86test & tfttest
author Pascal Bellard <pascal.bellard@slitaz.org>
date Sat May 08 17:23:19 2021 +0000 (2021-05-08)
parents
children eb617e43dc08
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/x86test/stuff/unlzma.S	Sat May 08 17:23:19 2021 +0000
     1.3 @@ -0,0 +1,1023 @@
     1.4 +// #define RC_NORMALIZE if (Range < kTopValue) { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
     1.5 +//
     1.6 +// #define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound)
     1.7 +// #define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits;
     1.8 +// #define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits;
     1.9 +//
    1.10 +//#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \
    1.11 +//  { UpdateBit0(p); mi <<= 1; A0; } else \
    1.12 +//  { UpdateBit1(p); mi = (mi + mi) + 1; A1; }
    1.13 +//
    1.14 +// #define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;)
    1.15 +//
    1.16 +// #define RangeDecoderBitTreeDecode(probs, numLevels, res) \
    1.17 +//  { int i = numLevels; res = 1; \
    1.18 +//  do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \
    1.19 +//  res -= (1 << numLevels); }
    1.20 +/*
    1.21 + * Compression with : lzma e src dst -eos -pb2 -lp0 -lc3
    1.22 + */
    1.23 +
    1.24 +#define PROP_PB 0
    1.25 +#define PROP_LP 0
    1.26 +#define PROP_LC 3
    1.27 +#define PROPS (PROP_LC+(PROP_LP*9)+(PROP_PB*45))
    1.28 +
    1.29 +// static const Byte *Buffer;
    1.30 +// static UInt32 bound, Code, Range;
    1.31 +
    1.32 +/*
    1.33 + * Buffer register DS:SI
    1.34 + * all var based ws=ss:bp
    1.35 + */
    1.36 +
    1.37 +rep0		=	-4		// long
    1.38 +rep1		=	rep0-4		// long
    1.39 +rep2		=	rep0-8		// long
    1.40 +rep3		=	rep0-12		// long
    1.41 +state		=	-17		// byte, 0..11
    1.42 +posState 	=	state-1		// byte, 0..15
    1.43 +posState2 	=	posState-1	// byte, 0..15
    1.44 +scratched	=	rep0-16		// byte = 1
    1.45 +Code		=	-24		// long
    1.46 +outStream	=	-28		// long
    1.47 +nowPos		=	outStream	// long
    1.48 +Range		=	Code-8		// long
    1.49 +#define LOCALS		32
    1.50 +
    1.51 +// int LzmaDecode(CLzmaDecoderState *vs,
    1.52 +//     const unsigned char *inStream, 
    1.53 +//     unsigned char *outStream)
    1.54 +// {
    1.55 +//   CProb *p = vs->Probs;
    1.56 +//   SizeT nowPos = 0;
    1.57 +//   #define posStateMask = (1 << (vs->Properties.pb)) - 1;
    1.58 +//   #define literalPosMask = (1 << (vs->Properties.lp)) - 1;
    1.59 +//   int lc = vs->Properties.lc, state = 0, len = 0;
    1.60 +//   UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
    1.61 +// 
    1.62 +//   {
    1.63 +//     UInt32 i, numProbs = Literal /*1846*/
    1.64 +// 	    + ((UInt32)LZMA_LIT_SIZE /*768*/ << (lc + vs->Properties.lp));
    1.65 +//     for (i = 0; i < numProbs; i++) p[i] = kBitModelTotal /*2048*/ >> 1;
    1.66 +
    1.67 +#define WS (1846+(768<<(PROP_LC+PROP_LP)))
    1.68 +#if (WS+WS+LOCALS) >= 65000
    1.69 +/* MAX WS = (1846+(768<<(8+4))) > 3MB! */
    1.70 +#error invalid (lc,lp,pb) : out of memory
    1.71 +#endif
    1.72 +
    1.73 +ws1	=	WS
    1.74 +ws2	=	ws1*2
    1.75 +ws	=	ws2+LOCALS+15
    1.76 +
    1.77 +#ifndef FLAT32
    1.78 +#define	AX	%ax
    1.79 +#define	BX	%bx
    1.80 +#define	CX	%cx
    1.81 +#define	DX	%dx
    1.82 +#define	SI	%si
    1.83 +#define	DI	%di
    1.84 +#define	BP	%bp
    1.85 +#define	SP	%sp
    1.86 +#define CWD	cwd
    1.87 +#else
    1.88 +#define	AX	%eax
    1.89 +#define	BX	%ebx
    1.90 +#define	CX	%ecx
    1.91 +#define	DX	%edx
    1.92 +#define	SI	%esi
    1.93 +#define	DI	%edi
    1.94 +#define	BP	%ebp
    1.95 +#define	SP	%esp
    1.96 +#define CWD	cdq
    1.97 +#endif
    1.98 +/*
    1.99 + * LzmaDecode:
   1.100 +#ifndef FLAT32
   1.101 + *   input   ds:si=inStream, es:di=outStream
   1.102 + *   output  outStream[], ds:si, es:di
   1.103 + 	.code 16
   1.104 +#else
   1.105 + *   input   esi=inStream, edi=outStream
   1.106 + *   output  outStream[], esi, edi
   1.107 + 	.code 32
   1.108 +#endif
   1.109 + */
   1.110 + 
   1.111 +	mov	$ws1, CX
   1.112 +#ifdef ONLY8086
   1.113 +	movw	$2048/2, %ax
   1.114 +lzd1:
   1.115 +	pushw	%ax
   1.116 +#else
   1.117 +lzd1:
   1.118 +	pushw	$2048/2
   1.119 +#endif
   1.120 +	loop	lzd1
   1.121 +	mov	SP, BP
   1.122 +	movb	$((LOCALS+3)/4)*2, %cl
   1.123 +#ifdef ONLY8086
   1.124 +	movw	$1, %ax
   1.125 +	cwd
   1.126 +initlocals:
   1.127 +	pushw	%dx
   1.128 +	pushw	%ax
   1.129 +#else
   1.130 +initlocals:
   1.131 +	pushl	$1
   1.132 +#endif
   1.133 +	loop	initlocals
   1.134 +
   1.135 +#if !defined(FLAT32) && !defined(FLAT16OUT)
   1.136 +	movb	$4, %cl
   1.137 +	movw	%es, %bx
   1.138 +	shrw	%cl, %bx
   1.139 +	movw	%es, %dx
   1.140 +	shlw	%cl, %dx
   1.141 +	addw	%dx, %di
   1.142 +	movw	%di, outStream(%bp)
   1.143 +	adcb	%bh, outStream+2(%bp)
   1.144 +	incw	%cx
   1.145 +#else
   1.146 +	movb	$5, %cl
   1.147 +	mov	DI, outStream(BP)
   1.148 +#endif
   1.149 +
   1.150 +//   Byte previousByte = 0;
   1.151 +	xor	BX, BX
   1.152 +
   1.153 +// #define RC_INIT(buffer) 
   1.154 +//    Buffer = buffer; Code = 0; Range = 0xFFFFFFFF; 
   1.155 +//    { int i; for(i=0; i<5; i++) { Code = (Code<<8) | RC_READ_BYTE; }}
   1.156 +//   }
   1.157 +//   RC_INIT(inStream);
   1.158 +
   1.159 +#ifndef NO_LZMA_HEADER
   1.160 +#ifdef CHECK_LZMA_HEADER
   1.161 +	cmp.w	$0x5A4C, (SI)	// lzip header ('LZIP' version:1 dicobits:1)
   1.162 +	je	lzip_header
   1.163 +	cmp.w	$0x5D, (SI)	// lzma header (0x5D dicosz:4 orgsz:8)
   1.164 +	jne	no_header
   1.165 +	add	$13-6, SI	// skip lzma header
   1.166 +lzip_header:
   1.167 +	add	$6, SI		// skip lzip header
   1.168 +no_header:
   1.169 +#else
   1.170 +	add	$13, SI		// skip lzma header (0x5D dicosz:4 orgsz:8)
   1.171 +#endif
   1.172 +#endif
   1.173 +setrep:
   1.174 +	call	RC_LOAD_BYTE
   1.175 +	decb	Range(BP)
   1.176 +	loop	setrep
   1.177 +
   1.178 +lzdmainlp:
   1.179 +//   while(1) {
   1.180 +//     CProb *prob;
   1.181 +//     int posState = (int)((nowPos) & posStateMask);
   1.182 +// 
   1.183 +//     prob = p + IsMatch /*0*/ + (state << kNumPosBitsMax /*4*/) + posState;
   1.184 +//     if (Bit0(prob)) { /* char */
   1.185 +
   1.186 +	xor	DX, DX
   1.187 +	call	Bit1state	// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.188 +	mov	$state, DI
   1.189 +	jc	lzdstring
   1.190 +
   1.191 +//       prob = p + Literal /*1846*/ + (LZMA_LIT_SIZE /*768*/ * 
   1.192 +// 	((((nowPos) & literalPosMask) << lc) + (previousByte >> (8 - lc))));
   1.193 +
   1.194 +#if PROP_LC != 0
   1.195 +# ifdef ONLY8086
   1.196 +	movb	$8-PROP_LC, %cl
   1.197 +	shrb	%cl, %bl
   1.198 +# else
   1.199 +	shrb	$8-PROP_LC, %bl
   1.200 +# endif
   1.201 +#else
   1.202 +	xor	%bx,%bx
   1.203 +#endif
   1.204 +
   1.205 +#if PROP_LP != 0
   1.206 +	movb	posState2(BP), %dl
   1.207 +# if PROP_LC != 0
   1.208 +#  ifdef ONLY8086
   1.209 +	movb	$PROP_LC, %cl
   1.210 +	shl	%cl, DX
   1.211 +#  else
   1.212 +	shl	$PROP_LC, DX
   1.213 +#  endif
   1.214 +	movb	$0, %bh
   1.215 +# endif
   1.216 +	add	BX, DX
   1.217 +#endif
   1.218 +
   1.219 +	movb	$3, %ah
   1.220 +	mul	BX		// dx = 3*bh
   1.221 +	add	$1846, AX
   1.222 +
   1.223 +//       int symbol = 1;
   1.224 +
   1.225 +	CWD
   1.226 +	inc	DX		// symbol = 1
   1.227 +	xchg	AX, CX		// save prob
   1.228 +
   1.229 +//       if (state >= kNumLitStates /*7*/) { /* previous was string */
   1.230 +//       if (state < 4) state = 0;
   1.231 +
   1.232 +lzd6z:
   1.233 +	subb	$3, (BP, DI)
   1.234 +
   1.235 +//       if (state < 4) state = 0;
   1.236 +
   1.237 +	jnc	lzd6
   1.238 +	movb	%dh, (BP, DI)	// %dh = 0
   1.239 +
   1.240 +lzd6:
   1.241 +//       else if (state < 10) state -= 3;
   1.242 +
   1.243 +	cmpb	$10-3, (BP, DI)
   1.244 +	
   1.245 +//       else state -= 6;
   1.246 +
   1.247 +	jnb	lzd6z
   1.248 +	cmpb	$7-3-1, (BP, DI)
   1.249 +	jbe	lzd3
   1.250 +	
   1.251 +//         int matchByte = outStream[nowPos - rep0];
   1.252 +
   1.253 +	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0];
   1.254 +	
   1.255 +//         do {
   1.256 +//           int bit;
   1.257 +//           CProb *probLit;
   1.258 +//           matchByte <<= 1; bit = (matchByte & 0x100);
   1.259 +
   1.260 +	movb	$1, %bh
   1.261 +lzd4:
   1.262 +	shlb	$1, %bl			// matchByte <<= 1
   1.263 +	sbb	DI, DI			// save bit=C
   1.264 +
   1.265 +//           probLit = prob + 0x100 + bit + symbol;
   1.266 +
   1.267 +	mov	CX, AX			// restore prob
   1.268 +	adcb	%bh, %ah		// + bit + 0x100
   1.269 +	
   1.270 +//           RC_GET_BIT2(probLit, symbol, if (bit) break, if (!bit) break)
   1.271 +
   1.272 +	call	Bit1axdx		// C,%ax = Bit1(prob+%ax)
   1.273 +	rclb	$1, %dl			// symbol <<= 1; symbol |= C
   1.274 +	jc	lzd5			// if symbol >= 0x100
   1.275 +	cmp	DI, AX
   1.276 +	jz	lzd4			// if bit == Bit1(prob+%ax)
   1.277 +
   1.278 +//         } while (symbol < 0x100);
   1.279 +//       }
   1.280 +lzd3:
   1.281 +//       while (symbol < 0x100) {
   1.282 +//         CProb *probLit = prob + symbol;
   1.283 +//         RC_GET_BIT(probLit, symbol)
   1.284 +//       }
   1.285 +
   1.286 +	xor	BX, BX
   1.287 +	jmp	lzd4
   1.288 +lzd5:
   1.289 +
   1.290 +//       outStream[nowPos++] = previousByte = (Byte)symbol;
   1.291 +
   1.292 +	xchg	AX, DX
   1.293 +	call	outchar		// %bl = outStream[nowPos++] = %al;
   1.294 +	jmp	lzdmainlp
   1.295 +
   1.296 +//     }
   1.297 +
   1.298 +lzdstring:
   1.299 +	mov	$1, CX
   1.300 +
   1.301 +//     else { /* string */
   1.302 +//       prob = p + IsRep /*192*/ + state;
   1.303 +
   1.304 +	movb	$192, %dl
   1.305 +	addb	(BP, DI), %dl
   1.306 +	mov	$rep0, DI
   1.307 +
   1.308 +//       if (Bit0(prob)) {
   1.309 +
   1.310 +	call	Bit1dx		// Bit1(prob)
   1.311 + 	jc	lzd8
   1.312 +
   1.313 +//         rep3 = rep2; rep2 = rep1; rep1 = rep0;
   1.314 +//         state = (state < kNumLitStates /*7*/) ? 0 : 3;
   1.315 +
   1.316 +	stc
   1.317 +
   1.318 +//         prob = p + LenCoder /*818*/;
   1.319 +
   1.320 +	mov	$818, DX
   1.321 +
   1.322 +//       }
   1.323 +
   1.324 +	jmp	lzd11a
   1.325 +
   1.326 +//       else {
   1.327 +lzd8:
   1.328 +//         prob += kNumStates /*12*/;
   1.329 +//         if (Bit0(prob)) {
   1.330 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.331 +	jc	lzd11
   1.332 +//           prob = p + IsRep0Long /*240*/ + (state << kNumPosBitsMax /*4*/) 
   1.333 +// 		   + posState;
   1.334 +	movb	$240, %dl	// dh=0
   1.335 +
   1.336 +//           if (Bit0(prob)) {
   1.337 +
   1.338 +	call	Bit1state	// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.339 +	jc	lzd12
   1.340 +
   1.341 +//             // if (nowPos == 0) return LZMA_RESULT_DATA_ERROR;
   1.342 +//             state = (state < kNumLitStates /*7*/) ? 9 : 11;
   1.343 +
   1.344 +	movb	$9, %dl
   1.345 +
   1.346 +//             len++; goto string;
   1.347 +	jmp	lzd13string	// ax = 0
   1.348 +//           }
   1.349 +//         }
   1.350 +//         else {
   1.351 +lzd11:
   1.352 +//           UInt32 distance = rep1;
   1.353 +//           prob += kNumStates /*12*/;
   1.354 +//           if (!Bit0(prob)) {
   1.355 +
   1.356 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.357 +	jnc	lzd11z
   1.358 +
   1.359 +//             prob += kNumStates /*12*/;
   1.360 +//             if (Bit0(prob)) distance = rep2;
   1.361 +
   1.362 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.363 +lzd11a:
   1.364 +	adcb	%cl, %cl
   1.365 +	
   1.366 +//             else { distance = rep3; rep3 = rep2; }
   1.367 +//             rep2 = rep1;
   1.368 +//           }
   1.369 +//           rep1 = rep0; rep0 = distance;
   1.370 +
   1.371 +lzd11z:
   1.372 +# ifdef ONLY8086
   1.373 +	shl	$1, CX
   1.374 +	shl	$1, CX		// 8->32 bits
   1.375 +	sub	CX, DI		// &rep[cx]
   1.376 +	movw	(BP, DI), %ax
   1.377 +	pushw	2(BP, DI)
   1.378 +rotreplp:
   1.379 +	movb	4(BP, DI), %bl
   1.380 +	movb	%bl, (BP, DI)
   1.381 +	inc	DI
   1.382 +	loop	rotreplp
   1.383 +	popw	%bx
   1.384 +	testb	%dh, %dh
   1.385 +	jnz	lzd10
   1.386 +	movw	%ax, (BP, DI)
   1.387 +	movw	%bx, 2(BP, DI)
   1.388 +# else
   1.389 +	shl	$2, CX		// 8->32 bits
   1.390 +	sub	CX, DI		// &rep[cx]
   1.391 +	movl	(BP, DI), %eax
   1.392 +rotreplp:
   1.393 +	movb	4(BP, DI), %bl
   1.394 +	movb	%bl, (BP, DI)
   1.395 +	inc	DI
   1.396 +	loop	rotreplp
   1.397 +	testb	%dh, %dh
   1.398 +	jnz	lzd10
   1.399 +	movl	%eax, (BP, DI)
   1.400 +# endif
   1.401 +
   1.402 +//         }
   1.403 +lzd12:
   1.404 +//         state = (state < kNumLitStates /*7*/) ? 8 : 11;
   1.405 +
   1.406 +	movb	$0x08, %cl
   1.407 +
   1.408 +//         prob = p + RepLenCoder /*1332*/;
   1.409 +
   1.410 +	mov	$1332, DX
   1.411 +
   1.412 +//       }
   1.413 +lzd10:
   1.414 +	push	CX		// CX = 0
   1.415 +
   1.416 +//       { /* get len */
   1.417 +//         int numBits, offset;
   1.418 +//         CProb *probLen = prob + LenChoice /*0*/;
   1.419 +//         numBits = kLenNumLowBits /*3*/;
   1.420 +
   1.421 +	movb	$8, %cl		// numBits : 3,3,8
   1.422 +
   1.423 +//         if (Bit0(probLen)) {
   1.424 +
   1.425 +	call	Bit1dx		// Bit1(prob)
   1.426 +	xchg	AX, BX
   1.427 +	inc	DX
   1.428 +	jnc	lzd15		// bx=0
   1.429 +
   1.430 +//           probLen = prob + LenLow/*2*/ + (posState << kLenNumLowBits/*3*/);
   1.431 +//           offset = 0;
   1.432 +//         }
   1.433 +//         else {
   1.434 +//           probLen = prob + LenChoice2 /*1*/;
   1.435 +
   1.436 +	call	Bit1dx		// Bit1(prob)
   1.437 +	add	AX, BX
   1.438 +
   1.439 +#if PROP_PB != 0
   1.440 +	inc	AX		// ah=0
   1.441 +#endif
   1.442 +	jc	lzd16		// %ax=0, %bx=-2 
   1.443 +lzd15:
   1.444 +#if PROP_PB != 0
   1.445 +	movb	$8, %al
   1.446 +	mulb	posState(BP)
   1.447 +#endif
   1.448 +
   1.449 +//           if (Bit0(probLen)) {
   1.450 +//             probLen = prob + LenMid/*130*/ + (posState << kLenNumMidBits/*3*/);
   1.451 +
   1.452 +	movb	$3, %cl		// numBits : 3,3,8
   1.453 +lzd16:
   1.454 +#if PROP_PB != 0
   1.455 +	add	$2-128-1, AX	// probLen : 2,130,258
   1.456 +#else
   1.457 +	mov	$2-128-1, AX	// probLen : 2,130,258
   1.458 +#endif
   1.459 +	add	DX, AX
   1.460 +	mov	$-8+1, DX	// offset  : 0,8,16
   1.461 +lzdargslp:
   1.462 +	add	$8, DX
   1.463 +	add	$128, AX
   1.464 +	inc	BX
   1.465 +	jle	lzdargslp	// leave with bx=1
   1.466 +
   1.467 +//             offset = kLenNumLowSymbols /*8*/;
   1.468 +//             //numBits = kLenNumMidBits /*3*/;
   1.469 +//           }
   1.470 +//           else {
   1.471 +//             probLen = prob + LenHigh /*258*/;
   1.472 +//             offset = kLenNumLowSymbols /*8*/ + kLenNumMidSymbols /*8*/;
   1.473 +//             numBits = kLenNumHighBits /*8*/;
   1.474 +//           }
   1.475 +//         }
   1.476 +//         RangeDecoderBitTreeDecode(probLen, numBits, len); len += offset;
   1.477 +
   1.478 +	push	DX
   1.479 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.480 +	pop	DX
   1.481 +	add	DX, AX		// offset
   1.482 +	pop	DX		// 0
   1.483 +lzd13string:
   1.484 +	push	AX
   1.485 +
   1.486 +// state = (state < kNumLitStates /*7*/) ? dl : dl|3;
   1.487 +
   1.488 +	movb	$7, %cl
   1.489 +	cmpb	%cl, state(BP)
   1.490 +	jb	new_state
   1.491 +	orb	$3, %dl
   1.492 +new_state:
   1.493 +	movb	%dl, state(BP)
   1.494 +
   1.495 +//       } /* get len */
   1.496 +//       if (state < 4) {
   1.497 +
   1.498 +	cmpb	$4-1, %dl
   1.499 +	ja	lzd19
   1.500 +
   1.501 +//         int posSlot;
   1.502 +//         state += kNumLitStates /*7*/;
   1.503 +
   1.504 +	addb	%cl, state(BP)
   1.505 +
   1.506 +//         prob = p + PosSlot /*432*/ + (((len < kNumLenToPosStates /*4*/) ? 
   1.507 +// 		len : kNumLenToPosStates - 1) << kNumPosSlotBits /*6*/);
   1.508 +
   1.509 +	cmp	$4+1, AX
   1.510 +	jb	lzd21
   1.511 +	mov	$3+1, AX
   1.512 +
   1.513 +lzd21:
   1.514 +
   1.515 +	dec	CX		// cx = 6
   1.516 +	shl	%cl, AX
   1.517 +	add	$432-64, AX
   1.518 +
   1.519 +//         RangeDecoderBitTreeDecode(prob, kNumPosSlotBits /*6*/, posSlot);
   1.520 +
   1.521 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.522 +
   1.523 +//         if (posSlot >= kStartPosModelIndex /*4*/) {
   1.524 +//           int numDirectBits = ((posSlot >> 1) - 1);
   1.525 +
   1.526 +#ifndef FLAT32
   1.527 +	movw	%cx, 2(%bp, %di)	// %cx = 0
   1.528 +#endif
   1.529 +	mov	AX, (BP, DI)
   1.530 +	mov	AX, CX
   1.531 +	shrw	$1, CX
   1.532 +	dec	CX
   1.533 +	cmpb	$4, %al
   1.534 +	jb	lzd22
   1.535 +
   1.536 +//           rep0 = (2 | ((UInt32)posSlot & 1));
   1.537 +
   1.538 +	andb	%bl, (BP, DI)		// %bx=1
   1.539 +	orb	$2, (BP, DI)
   1.540 +
   1.541 +//           if (posSlot < kEndPosModelIndex /*14*/) {
   1.542 +
   1.543 +	cmpb	$14, %al
   1.544 +	jnb	lzd23
   1.545 +
   1.546 +//             rep0 <<= numDirectBits;
   1.547 +
   1.548 +	neg	AX
   1.549 +# ifdef ONLY8086
   1.550 +	pushw	%cx
   1.551 +	movb	$0, %ch
   1.552 +shllrep0:
   1.553 +	shlw	$1, (BP, DI)
   1.554 +	rclw	$1, 2(BP, DI)
   1.555 +	loop	shllrep0
   1.556 +	popw	%cx
   1.557 +# else
   1.558 +	shll	%cl, (BP, DI)
   1.559 +# endif
   1.560 +	add	(BP, DI), AX
   1.561 +
   1.562 +//             prob = p + SpecPos /*688*/ + rep0 - posSlot - 1;
   1.563 +
   1.564 +	add	$687, AX
   1.565 +	jmp	lzd24
   1.566 +
   1.567 +//           }
   1.568 +//           else {
   1.569 +lzd23:
   1.570 +//             numDirectBits -= kNumAlignBits /*4*/;
   1.571 +//             do {
   1.572 +//               RC_NORMALIZE; Range >>= 1; rep0 <<= 1;
   1.573 +//               if (Code >= Range) { Code -= Range; rep0 |= 1; }
   1.574 +
   1.575 +lzd23z:
   1.576 +	call	RC_NORMALIZE
   1.577 +# ifdef ONLY8086
   1.578 +	pushw	%dx
   1.579 +	shrw	$1, Range+2(BP)
   1.580 +	rcrw	$1, Range(BP)
   1.581 +	movw	Range(BP), %ax
   1.582 +	movw	Range+2(BP), %dx
   1.583 +	cmpw	Code+2(BP), %dx
   1.584 +	ja	lzd25
   1.585 +	jb	lzd25x
   1.586 +	cmpw	Code(BP), %ax
   1.587 +	ja	lzd25
   1.588 +lzd25x:
   1.589 +	subw	%ax, Code(BP)
   1.590 +	sbbw	%dx, Code+2(BP)
   1.591 +	stc
   1.592 +lzd25:
   1.593 +	popw	%dx
   1.594 +	rclw	$1, (BP, DI)
   1.595 +	rclw	$1, 2(BP, DI)
   1.596 +# else
   1.597 +	shrl	$1, Range(BP)
   1.598 +	movl	Range(BP), %eax
   1.599 +	cmpl	Code(BP), %eax
   1.600 +	ja	lzd25
   1.601 +	subl	%eax, Code(BP)
   1.602 +	stc
   1.603 +lzd25:
   1.604 +	rcll	$1, (BP, DI)
   1.605 +# endif
   1.606 +
   1.607 +//             } while (--numDirectBits != 0);
   1.608 +
   1.609 +	cmpb	$4+1, %cl
   1.610 +	loopne	lzd23z
   1.611 +
   1.612 +//             prob = p + Align /* 802 */; numDirectBits = kNumAlignBits /*4*/;
   1.613 +//             rep0 <<= numDirectBits;
   1.614 +
   1.615 +# ifdef ONLY8086
   1.616 +	pushw	%cx
   1.617 +	movb	$0, %ch
   1.618 +shlrep0:
   1.619 +	shlw	$1, (BP, DI)
   1.620 +	rclw	$1, 2(BP, DI)
   1.621 +	loop	shlrep0
   1.622 +	popw	%cx
   1.623 +# else
   1.624 +	shll	%cl, (BP, DI)
   1.625 +# endif
   1.626 +	mov	$802, AX
   1.627 +//           }
   1.628 +
   1.629 +lzd24:
   1.630 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.631 +
   1.632 +//           {
   1.633 +//             int i = 1, mi = 1;
   1.634 +//             do {
   1.635 +//               CProb *prob3 = prob + mi;
   1.636 +//               RC_GET_BIT2(prob3, mi, ; , rep0 |= i);
   1.637 +
   1.638 +	orb	%dh, (BP, DI)	// update rep0 with DirectBits
   1.639 +
   1.640 +//               i <<= 1;
   1.641 +//             } while(--numDirectBits != 0);
   1.642 +//           }
   1.643 +//         } else rep0 = posSlot;
   1.644 +lzd22:
   1.645 +//         if (++rep0 == (UInt32)(0)) break; /* EOF */
   1.646 +
   1.647 +# ifdef ONLY8086
   1.648 +	incw	(BP, DI)
   1.649 +	jnz	lzd19
   1.650 +	incw	2(BP, DI)
   1.651 +# else
   1.652 +	incl	(BP, DI)
   1.653 +# endif
   1.654 +
   1.655 +lzd19:
   1.656 +	pop	CX
   1.657 +	jz	lzdone
   1.658 +
   1.659 +//       }
   1.660 +//       len += kMatchMinLen;/*2*/
   1.661 +
   1.662 +	inc	CX
   1.663 +
   1.664 +//     string: // if (rep0 > nowPos) return LZMA_RESULT_DATA_ERROR;
   1.665 +//       do {
   1.666 +lzd13z:
   1.667 +//         previousByte = outStream[nowPos - rep0];
   1.668 +//         outStream[nowPos++] = previousByte;
   1.669 +
   1.670 +	call	outcharDico 	// %bl = outStream[nowPos++] = outStream[nowPos - rep0]
   1.671 +
   1.672 +//       } while(--len != 0);
   1.673 +
   1.674 +	loop	lzd13z
   1.675 +
   1.676 +//     } /* char/string */
   1.677 +//   }
   1.678 +
   1.679 +	jmp	lzdmainlp
   1.680 +
   1.681 +lzdone:
   1.682 +//   //RC_NORMALIZE;
   1.683 +//   //*inSizeProcessed = (SizeT)(Buffer - inStream); *outSizeProcessed = nowPos;
   1.684 +//   return LZMA_RESULT_OK;
   1.685 +	call	Dico2ESDI	// set es & di (rep0 = 0)
   1.686 +	lea	ws2(BP), SP	// dealloc
   1.687 +	ret	
   1.688 +// }
   1.689 +
   1.690 +// al = outStream[nowPos - rep0];
   1.691 +
   1.692 +/*
   1.693 + * output  es:di, al
   1.694 + * scratch bh, cl, flags
   1.695 + */
   1.696 +
   1.697 +DicoRep02ESDI:
   1.698 +	stc
   1.699 +
   1.700 +// bl = outStream[nowPos];
   1.701 +
   1.702 +/*
   1.703 + * output  es:di, bl
   1.704 + * scratch bh, cl, flags
   1.705 + */
   1.706 + 
   1.707 +Dico2ESDI:
   1.708 +#if !defined(FLAT32) && !defined(FLAT16OUT)
   1.709 +# ifdef ONLY8086
   1.710 +	pushw	%ax
   1.711 +	movw	nowPos(%bp), %bx
   1.712 +	movw	nowPos+2(%bp), %ax
   1.713 +	jnc	Dico2ESDIz
   1.714 +	subw	rep0(%bp), %bx
   1.715 +	sbbw	rep0+2(%bp), %ax
   1.716 +Dico2ESDIz:
   1.717 +	movw	$0xF, %di
   1.718 +	andw	%bx, %di
   1.719 +	pushw	%cx
   1.720 +	movb	$4, %cl
   1.721 +	shrw	%cl, %bx
   1.722 +	shlw	%cl, %ax
   1.723 +	popw	%cx
   1.724 +	addb	%al, %bh
   1.725 +	popw	%ax
   1.726 +# else
   1.727 +	movl	nowPos(%bp), %ebx
   1.728 +	jnc	Dico2ESDIz
   1.729 +	subl	rep0(%bp), %ebx
   1.730 +Dico2ESDIz:
   1.731 +	movw	%bx, %di
   1.732 +	xorw	%bx, %bx
   1.733 +	shrl	$4, %ebx
   1.734 +# endif
   1.735 +	movw	%bx, %es
   1.736 +#else
   1.737 +	mov	nowPos(BP), DI
   1.738 +	jnc	Dico2ESDIz
   1.739 +	sub	rep0(BP), DI
   1.740 +Dico2ESDIz:
   1.741 +#endif
   1.742 +#ifdef FLAT32
   1.743 +	movb	(DI), %bl
   1.744 +#else
   1.745 +	movb	%es:(%di), %bl
   1.746 +#endif
   1.747 +	ret
   1.748 +
   1.749 +outcharDico:
   1.750 +
   1.751 +// bl = outStream[nowPos++] = outStream[nowPos - rep0]
   1.752 +
   1.753 +/*
   1.754 + * output  es:di, bl
   1.755 + * update  nowPos
   1.756 + * scratch ax, dx, bh, cl, flags
   1.757 + */
   1.758 +
   1.759 +	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0]
   1.760 +	xchg	AX, BX
   1.761 +outchar:
   1.762 +
   1.763 +// bl = outStream[nowPos++] = previousByte = al;
   1.764 +
   1.765 +/*
   1.766 + * output  bl
   1.767 + * update  nowPos
   1.768 + * scratch ax, dx, bh, di, cl, flags
   1.769 + */
   1.770 +
   1.771 +	clc
   1.772 +	call	Dico2ESDI
   1.773 +	stosb
   1.774 +	xchg	AX, BX		// previous byte
   1.775 +
   1.776 +//	int posState = (int)((nowPos) & posStateMask);
   1.777 +
   1.778 +#if PROP_PB != 0 && PROP_LP != 0
   1.779 +	addw	$0x0101, posState2(BP)
   1.780 +	andb	$(((1 << PROP_PB) -1)<<8)+((1 << PROP_LP) -1), posState2(BP)
   1.781 +#else
   1.782 +# if PROP_PB != 0
   1.783 +	incb	posState(BP)
   1.784 +	andb	$((1 << PROP_PB) -1), posState(BP)
   1.785 +# endif
   1.786 +# if PROP_LP != 0
   1.787 +	incb	posState2(BP)
   1.788 +	andb	$((1 << PROP_LP) -1), posState2(BP)
   1.789 +# endif
   1.790 +#endif
   1.791 +#ifdef ONLY8086
   1.792 +	incw	nowPos(BP)
   1.793 +	jnz	incnowPosDone
   1.794 +	incw	nowPos+2(BP)
   1.795 +incnowPosDone:
   1.796 +#else
   1.797 +	incl	nowPos(BP)
   1.798 +#endif
   1.799 +	ret
   1.800 +
   1.801 +//  
   1.802 +// #define RC_NORMALIZE if (Range < kTopValue) 
   1.803 +//    { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
   1.804 +
   1.805 +/*
   1.806 + * update  Range, Code, ds:si
   1.807 + * scratch flags
   1.808 + */
   1.809 +
   1.810 +RC_NORMALIZE:
   1.811 +	cmpb	$0, Range+3(BP)
   1.812 +	jne	RC_NORMALIZE_1
   1.813 +RC_LOAD_BYTE:
   1.814 +	push	AX
   1.815 +#ifdef ONLY8086
   1.816 +	movw	Range+1(BP), %ax
   1.817 +	movw	%ax, Range+2(BP)
   1.818 +	movw	Code+1(BP), %ax
   1.819 +	movw	%ax, Code+2(BP)
   1.820 +	xorw	%ax, %ax
   1.821 +	movb	Range(BP), %ah
   1.822 +	movw	%ax, Range(BP)
   1.823 +	movb	Code(BP), %ah
   1.824 +	movw	%ax, Code(BP)
   1.825 +#else
   1.826 +	shll	$8, Range(BP)
   1.827 +	shll	$8, Code(BP)
   1.828 +#endif
   1.829 +#if !defined(FLAT16) && !defined(FLAT32)
   1.830 +	testw	%si, %si
   1.831 +	jns	RC_READ_BYTE
   1.832 +	movw	%ds, %ax
   1.833 +	incw	%ax
   1.834 +	movw	%ax, %ds
   1.835 +	addw	$-16, %si
   1.836 +RC_READ_BYTE:
   1.837 +#endif
   1.838 +	lodsb
   1.839 +	movb	%al, Code(BP)
   1.840 +	pop	AX
   1.841 +RC_NORMALIZE_1:
   1.842 +	ret
   1.843 +
   1.844 +// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.845 +
   1.846 +Bit1state:
   1.847 +	movb	$16, %al
   1.848 +	mulb	state(BP)
   1.849 +# if PROP_PB != 0
   1.850 +	addb	posState(BP), %al
   1.851 +# endif
   1.852 +Bit1axdx:
   1.853 +	add	DX, AX
   1.854 +	jmp	Bit1
   1.855 +
   1.856 +// prob += 12; Bit1(prob)
   1.857 +
   1.858 +Bit1dx12:
   1.859 +	add	$12, DX
   1.860 +Bit1dx:
   1.861 +	mov	DX, AX
   1.862 +
   1.863 +// static int Bit1(CProb *p)
   1.864 +
   1.865 +Bit1:
   1.866 +/*
   1.867 + * input   ax=p
   1.868 + * output  C, ax
   1.869 + * update  bound, Range, Code, ds:si
   1.870 + * scratch flags
   1.871 + */
   1.872 + 
   1.873 +// {
   1.874 +// 	RC_NORMALIZE;
   1.875 +
   1.876 +	call  RC_NORMALIZE		// kill %ax, update %si
   1.877 +
   1.878 +#ifdef ONLY8086
   1.879 +	pushw	%ax
   1.880 +	pushw	%cx
   1.881 +	pushw	%dx
   1.882 +	pushw	%di
   1.883 +#else
   1.884 +	pushal
   1.885 +#endif
   1.886 +
   1.887 +	xchg	AX, DI
   1.888 +	add	DI, DI			// short *
   1.889 +	
   1.890 +
   1.891 +// 	bound = (Range>>kNumBitModelTotalBits /*11*/) * *(p);
   1.892 +
   1.893 +#ifdef ONLY8086
   1.894 +	movw	Range(BP), %dx
   1.895 +	movw	Range+2(BP), %ax
   1.896 +	movw	$11, %cx
   1.897 +shr11lp:
   1.898 +	shrw	$1, %ax
   1.899 +	rcrw	$1, %dx
   1.900 +	loop	shr11lp
   1.901 +	movw	%dx, %cx
   1.902 +	mulw	(BP, DI)
   1.903 +	xchgw	%ax, %cx
   1.904 +	mulw	(BP, DI)
   1.905 +	addw	%cx, %dx
   1.906 +#else
   1.907 +	movl	Range(BP), %eax
   1.908 +	shrl	$11, %eax
   1.909 +	movzwl	(BP, DI), %edx
   1.910 +	mull	%edx
   1.911 +#endif
   1.912 +
   1.913 +// 	if (Code < bound) {
   1.914 +
   1.915 +#ifdef ONLY8086
   1.916 +	cmpw	Code+2(BP), %dx
   1.917 +	jb	Bit1_1
   1.918 +	ja	Bit1_1x
   1.919 +	cmpw	Code(BP), %ax
   1.920 +	jbe	Bit1_1
   1.921 +Bit1_1x:
   1.922 +
   1.923 +//    		Range = bound;
   1.924 +
   1.925 +	movw	%ax, Range(BP)
   1.926 +	movw	%dx, Range+2(BP)
   1.927 +#else
   1.928 +	cmpl	Code(BP), %eax
   1.929 +	jbe	Bit1_1
   1.930 +
   1.931 +//    		Range = bound;
   1.932 +
   1.933 +	movl	%eax, Range(BP)
   1.934 +#endif
   1.935 +
   1.936 +// 		*(p) += (kBitModelTotal /*2048*/ - *(p)) >> kNumMoveBits /*5*/;
   1.937 +
   1.938 +	movw	$2048, %ax
   1.939 +
   1.940 +// 		return 0;
   1.941 +
   1.942 +	jmp	Bit1_2
   1.943 +
   1.944 +//	}
   1.945 +// 	else {
   1.946 +
   1.947 +Bit1_1:
   1.948 +
   1.949 +//    		Range -= bound; Code -= bound;
   1.950 +
   1.951 +#ifdef ONLY8086
   1.952 +	subw	%ax, Range(BP)
   1.953 +	sbbw	%dx, Range+2(BP)
   1.954 +	subw	%ax, Code(BP)
   1.955 +	sbbw	%dx, Code+2(BP)
   1.956 +#else
   1.957 +	subl	%eax, Range(BP)
   1.958 +	subl	%eax, Code(BP)
   1.959 +#endif
   1.960 +
   1.961 +// 		*(p) -= (*(p)) >> kNumMoveBits /*5*/;
   1.962 +
   1.963 +	movw	$31, %ax
   1.964 +
   1.965 +// 		return 1;
   1.966 +
   1.967 +	stc
   1.968 +Bit1_2:
   1.969 +	pushf
   1.970 +	subw	(BP, DI), %ax
   1.971 +#ifdef ONLY8086
   1.972 +	movb	$5, %cl
   1.973 +	sarw	%cl, %ax
   1.974 +#else
   1.975 +	sarw	$5, %ax
   1.976 +#endif
   1.977 +	addw	%ax, (BP, DI)
   1.978 +	popf
   1.979 +#ifdef ONLY8086
   1.980 +	popw	%di
   1.981 +	popw	%dx
   1.982 +	popw	%cx
   1.983 +	popw	%ax
   1.984 +#else
   1.985 +	popal
   1.986 +#endif
   1.987 +	sbb	AX, AX
   1.988 +
   1.989 +// 	}
   1.990 +// }
   1.991 +
   1.992 +	ret
   1.993 +
   1.994 +RangeDecoder:
   1.995 +
   1.996 +/*
   1.997 + * input   ax=probs cx=numLevels (< 8) bx=1
   1.998 + * output  ax=res (backward), dh (forward)
   1.999 + * update  bound, Range, Code, ds:si
  1.1000 + * scratch flags, cx=0, dl
  1.1001 + */
  1.1002 + 
  1.1003 +	push	BX
  1.1004 +	
  1.1005 +//   { int i = numLevels; res = 1; 
  1.1006 +	mov	BX, DX		// res = 1
  1.1007 +	
  1.1008 +//   do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); 
  1.1009 +
  1.1010 +RangeDecoder_1:
  1.1011 +	push	AX
  1.1012 +	call	Bit1axdx		// C,%ax = Bit1(prob+%ax)
  1.1013 +	rclb	$1, %dl			// res <<= 1; res |= C
  1.1014 +	andb	%bl, %al		// current bit
  1.1015 +	orb	%al, %bh		// store in bh
  1.1016 +	shlb	$1, %bl			// update max
  1.1017 +	pop	AX
  1.1018 +	loop	RangeDecoder_1
  1.1019 +
  1.1020 +//   res -= (1 << numLevels); }
  1.1021 +
  1.1022 +	xchg	AX, BX			// move bh to dh
  1.1023 +	xchg	AX, DX			// and dl to al
  1.1024 +	sub	%dl, %al		// sub max
  1.1025 +	pop	BX
  1.1026 +	ret