wok-next diff memtest/stuff/unlzma.S @ rev 13287

memtest: lzma compression
author Pascal Bellard <pascal.bellard@slitaz.org>
date Sun Aug 26 00:21:22 2012 +0200 (2012-08-26)
parents
children e76c37f7bfe6
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/memtest/stuff/unlzma.S	Sun Aug 26 00:21:22 2012 +0200
     1.3 @@ -0,0 +1,822 @@
     1.4 +// #define RC_NORMALIZE if (Range < kTopValue) { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
     1.5 +//
     1.6 +// #define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound)
     1.7 +// #define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits;
     1.8 +// #define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits;
     1.9 +//
    1.10 +//#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \
    1.11 +//  { UpdateBit0(p); mi <<= 1; A0; } else \
    1.12 +//  { UpdateBit1(p); mi = (mi + mi) + 1; A1; }
    1.13 +//
    1.14 +// #define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;)
    1.15 +//
    1.16 +// #define RangeDecoderBitTreeDecode(probs, numLevels, res) \
    1.17 +//  { int i = numLevels; res = 1; \
    1.18 +//  do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \
    1.19 +//  res -= (1 << numLevels); }
    1.20 +/*
    1.21 + * Compression with : lzma e src dst -eos -pb2 -lp0 -lc3
    1.22 + */
    1.23 +
    1.24 +#define PROP_PB 2
    1.25 +#define PROP_LP 0
    1.26 +#define PROP_LC 3
    1.27 +#define PROPS (PROP_LC+(PROP_LP*9)+(PROP_PB*45))
    1.28 +
    1.29 +// static const Byte *Buffer;
    1.30 +// static UInt32 bound, Code, Range;
    1.31 +
    1.32 +/*
    1.33 + * Buffer register DS:SI
    1.34 + * all var based ws=ss:bp
    1.35 + */
    1.36 +
    1.37 +rep0		=	-4		// long
    1.38 +rep1		=	rep0-4		// long
    1.39 +rep2		=	rep0-8		// long
    1.40 +rep3		=	rep0-12		// long
    1.41 +state		=	-17		// byte, 0..11
    1.42 +posState 	=	state-1		// byte, 0..15
    1.43 +posState2 	=	posState-1	// byte, 0..15
    1.44 +scratched	=	rep0-16		// byte = 1
    1.45 +Code		=	-24		// long
    1.46 +outStream	=	-28		// long
    1.47 +nowPos		=	outStream	// long
    1.48 +Range		=	Code-8		// long
    1.49 +#define LOCALS		32
    1.50 +
    1.51 +// int LzmaDecode(CLzmaDecoderState *vs,
    1.52 +//     const unsigned char *inStream, 
    1.53 +//     unsigned char *outStream)
    1.54 +// {
    1.55 +//   CProb *p = vs->Probs;
    1.56 +//   SizeT nowPos = 0;
    1.57 +//   #define posStateMask = (1 << (vs->Properties.pb)) - 1;
    1.58 +//   #define literalPosMask = (1 << (vs->Properties.lp)) - 1;
    1.59 +//   int lc = vs->Properties.lc, state = 0, len = 0;
    1.60 +//   UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
    1.61 +// 
    1.62 +//   {
    1.63 +//     UInt32 i, numProbs = Literal /*1846*/
    1.64 +// 	    + ((UInt32)LZMA_LIT_SIZE /*768*/ << (lc + vs->Properties.lp));
    1.65 +//     for (i = 0; i < numProbs; i++) p[i] = kBitModelTotal /*2048*/ >> 1;
    1.66 +
    1.67 +#define WS (1846+(768<<(PROP_LC+PROP_LP)))
    1.68 +#if (WS+WS+LOCALS) >= 65000
    1.69 +/* MAX WS = (1846+(768<<(8+4))) > 3MB! */
    1.70 +#error invalid (lc,lp,pb) : out of memory
    1.71 +#endif
    1.72 +
    1.73 +ws1	=	WS
    1.74 +ws2	=	ws1*2
    1.75 +ws	=	ws2+LOCALS+15
    1.76 +
    1.77 +#ifndef FLAT32
    1.78 +#define	AX	%ax
    1.79 +#define	BX	%bx
    1.80 +#define	CX	%cx
    1.81 +#define	DX	%dx
    1.82 +#define	SI	%si
    1.83 +#define	DI	%di
    1.84 +#define	BP	%bp
    1.85 +#define	SP	%sp
    1.86 +#define CWD	cwd
    1.87 +#else
    1.88 +#define	AX	%eax
    1.89 +#define	BX	%ebx
    1.90 +#define	CX	%ecx
    1.91 +#define	DX	%edx
    1.92 +#define	SI	%esi
    1.93 +#define	DI	%edi
    1.94 +#define	BP	%ebp
    1.95 +#define	SP	%esp
    1.96 +#define CWD	cdq
    1.97 +#endif
    1.98 +/*
    1.99 + * LzmaDecode:
   1.100 +#ifndef FLAT32
   1.101 + *   input   ds:si=inStream, es:di=outStream
   1.102 + *   output  outStream[], ds:si, es:di
   1.103 + 	.code 16
   1.104 +#else
   1.105 + *   input   esi=inStream, edi=outStream
   1.106 + *   output  outStream[], esi, edi
   1.107 + 	.code 32
   1.108 +#endif
   1.109 + */
   1.110 + 
   1.111 +	mov	$ws1, CX
   1.112 +lzd1:
   1.113 +	pushw	$2048/2
   1.114 +	loop	lzd1
   1.115 +	mov	SP, BP
   1.116 +	movb	$((LOCALS+3)/4)*2, %cl
   1.117 +initlocals:
   1.118 +	pushl	$1
   1.119 +	loop	initlocals
   1.120 +
   1.121 +#ifndef FLAT32
   1.122 +	movb	$4, %cl
   1.123 +	movw	%es, %bx
   1.124 +	shrw	%cl, %bx
   1.125 +	movw	%es, %dx
   1.126 +	shlw	%cl, %dx
   1.127 +	addw	%dx, %di
   1.128 +	movw	%di, outStream(%bp)
   1.129 +	adcb	%bh, outStream+2(%bp)
   1.130 +	incw	%cx
   1.131 +#else
   1.132 +	movb	$5, %cl
   1.133 +	movl	%edi, outStream(%ebp)
   1.134 +#endif
   1.135 +
   1.136 +//   Byte previousByte = 0;
   1.137 +	xor	BX, BX
   1.138 +
   1.139 +// #define RC_INIT(buffer) 
   1.140 +//    Buffer = buffer; Code = 0; Range = 0xFFFFFFFF; 
   1.141 +//    { int i; for(i=0; i<5; i++) { Code = (Code<<8) | RC_READ_BYTE; }}
   1.142 +//   }
   1.143 +//   RC_INIT(inStream);
   1.144 +
   1.145 +	add	$13, SI		// skip header
   1.146 +setrep:
   1.147 +	call	RC_LOAD_BYTE
   1.148 +	decb	Range(BP)
   1.149 +	loop	setrep
   1.150 +
   1.151 +lzdmainlp:
   1.152 +//   while(1) {
   1.153 +//     CProb *prob;
   1.154 +//     int posState = (int)((nowPos) & posStateMask);
   1.155 +// 
   1.156 +//     prob = p + IsMatch /*0*/ + (state << kNumPosBitsMax /*4*/) + posState;
   1.157 +//     if (Bit0(prob)) { /* char */
   1.158 +
   1.159 +	xor	DX, DX
   1.160 +	call	Bit1state	// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.161 +	mov	$state, DI
   1.162 +	jc	lzdstring
   1.163 +
   1.164 +//       prob = p + Literal /*1846*/ + (LZMA_LIT_SIZE /*768*/ * 
   1.165 +// 	((((nowPos) & literalPosMask) << lc) + (previousByte >> (8 - lc))));
   1.166 +
   1.167 +#if PROP_LC != 0
   1.168 +	shrb	$8-PROP_LC, %bl
   1.169 +#endif
   1.170 +
   1.171 +#if PROP_LP != 0
   1.172 +	movb	posState2(BP), %dl
   1.173 +	shl	$PROP_LC, DX
   1.174 +	movb	$0, %bh
   1.175 +	add	BX, DX
   1.176 +#endif
   1.177 +
   1.178 +	movb	$3, %ah
   1.179 +	mul	BX		// dx = 3*bh
   1.180 +	add	$1846, AX
   1.181 +
   1.182 +//       int symbol = 1;
   1.183 +
   1.184 +	CWD
   1.185 +	inc	DX		// symbol = 1
   1.186 +	xchg	AX, CX		// save prob
   1.187 +
   1.188 +//       if (state >= kNumLitStates /*7*/) { /* previous was string */
   1.189 +//       if (state < 4) state = 0;
   1.190 +
   1.191 +lzd6z:
   1.192 +	subb	$3, (BP, DI)
   1.193 +
   1.194 +//       if (state < 4) state = 0;
   1.195 +
   1.196 +	jnc	lzd6
   1.197 +	movb	%dh, (BP, DI)	// %dh = 0
   1.198 +
   1.199 +lzd6:
   1.200 +//       else if (state < 10) state -= 3;
   1.201 +
   1.202 +	cmpb	$10-3, (BP, DI)
   1.203 +	
   1.204 +//       else state -= 6;
   1.205 +
   1.206 +	jnb	lzd6z
   1.207 +	cmpb	$7-3-1, (BP, DI)
   1.208 +	jbe	lzd3
   1.209 +	
   1.210 +//         int matchByte = outStream[nowPos - rep0];
   1.211 +
   1.212 +	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0];
   1.213 +	
   1.214 +//         do {
   1.215 +//           int bit;
   1.216 +//           CProb *probLit;
   1.217 +//           matchByte <<= 1; bit = (matchByte & 0x100);
   1.218 +
   1.219 +	movb	$1, %bh
   1.220 +lzd4:
   1.221 +	shlb	$1, %bl			// matchByte <<= 1
   1.222 +	sbb	DI, DI			// save bit=C
   1.223 +
   1.224 +//           probLit = prob + 0x100 + bit + symbol;
   1.225 +
   1.226 +	mov	CX, AX			// restore prob
   1.227 +	adcb	%bh, %ah		// + bit + 0x100
   1.228 +	
   1.229 +//           RC_GET_BIT2(probLit, symbol, if (bit) break, if (!bit) break)
   1.230 +
   1.231 +	call	Bit1axdx		// C,%ax = Bit1(prob+%ax)
   1.232 +	rclb	$1, %dl			// symbol <<= 1; symbol |= C
   1.233 +	jc	lzd5			// if symbol >= 0x100
   1.234 +	cmp	DI, AX
   1.235 +	jz	lzd4			// if bit == Bit1(prob+%ax)
   1.236 +
   1.237 +//         } while (symbol < 0x100);
   1.238 +//       }
   1.239 +lzd3:
   1.240 +//       while (symbol < 0x100) {
   1.241 +//         CProb *probLit = prob + symbol;
   1.242 +//         RC_GET_BIT(probLit, symbol)
   1.243 +//       }
   1.244 +
   1.245 +	xor	BX, BX
   1.246 +	jmp	lzd4
   1.247 +lzd5:
   1.248 +
   1.249 +//       outStream[nowPos++] = previousByte = (Byte)symbol;
   1.250 +
   1.251 +	xchg	AX, DX
   1.252 +	call	outchar		// %bl = outStream[nowPos++] = %al;
   1.253 +	jmp	lzdmainlp
   1.254 +
   1.255 +//     }
   1.256 +
   1.257 +lzdstring:
   1.258 +	mov	$1, CX
   1.259 +
   1.260 +//     else { /* string */
   1.261 +//       prob = p + IsRep /*192*/ + state;
   1.262 +
   1.263 +	movb	$192, %dl
   1.264 +	addb	(BP, DI), %dl
   1.265 +	mov	$rep0, DI
   1.266 +
   1.267 +//       if (Bit0(prob)) {
   1.268 +
   1.269 +	call	Bit1dx		// Bit1(prob)
   1.270 + 	jc	lzd8
   1.271 +
   1.272 +//         rep3 = rep2; rep2 = rep1; rep1 = rep0;
   1.273 +//         state = (state < kNumLitStates /*7*/) ? 0 : 3;
   1.274 +
   1.275 +	stc
   1.276 +
   1.277 +//         prob = p + LenCoder /*818*/;
   1.278 +
   1.279 +	mov	$818, DX
   1.280 +
   1.281 +//       }
   1.282 +
   1.283 +	jmp	lzd11a
   1.284 +
   1.285 +//       else {
   1.286 +lzd8:
   1.287 +//         prob += kNumStates /*12*/;
   1.288 +//         if (Bit0(prob)) {
   1.289 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.290 +	jc	lzd11
   1.291 +//           prob = p + IsRep0Long /*240*/ + (state << kNumPosBitsMax /*4*/) 
   1.292 +// 		   + posState;
   1.293 +	movb	$240, %dl	// dh=0
   1.294 +
   1.295 +//           if (Bit0(prob)) {
   1.296 +
   1.297 +	call	Bit1state	// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.298 +	jc	lzd12
   1.299 +
   1.300 +//             // if (nowPos == 0) return LZMA_RESULT_DATA_ERROR;
   1.301 +//             state = (state < kNumLitStates /*7*/) ? 9 : 11;
   1.302 +
   1.303 +	movb	$9, %dl
   1.304 +
   1.305 +//             len++; goto string;
   1.306 +	jmp	lzd13string	// ax = 0
   1.307 +//           }
   1.308 +//         }
   1.309 +//         else {
   1.310 +lzd11:
   1.311 +//           UInt32 distance = rep1;
   1.312 +//           prob += kNumStates /*12*/;
   1.313 +//           if (!Bit0(prob)) {
   1.314 +
   1.315 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.316 +	jnc	lzd11z
   1.317 +
   1.318 +//             prob += kNumStates /*12*/;
   1.319 +//             if (Bit0(prob)) distance = rep2;
   1.320 +
   1.321 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.322 +lzd11a:
   1.323 +	adcb	%cl, %cl
   1.324 +	
   1.325 +//             else { distance = rep3; rep3 = rep2; }
   1.326 +//             rep2 = rep1;
   1.327 +//           }
   1.328 +//           rep1 = rep0; rep0 = distance;
   1.329 +
   1.330 +lzd11z:
   1.331 +	shl	$2, CX		// 8->32 bits
   1.332 +	sub	CX, DI		// &rep[cx]
   1.333 +	movl	(BP, DI), %eax
   1.334 +rotreplp:
   1.335 +	movb	4(BP, DI), %bl
   1.336 +	movb	%bl, (BP, DI)
   1.337 +	inc	DI
   1.338 +	loop	rotreplp
   1.339 +	testb	%dh, %dh
   1.340 +	jnz	lzd10
   1.341 +	movl	%eax, (BP, DI)
   1.342 +
   1.343 +//         }
   1.344 +lzd12:
   1.345 +//         state = (state < kNumLitStates /*7*/) ? 8 : 11;
   1.346 +
   1.347 +	movb	$0x08, %cl
   1.348 +
   1.349 +//         prob = p + RepLenCoder /*1332*/;
   1.350 +
   1.351 +	mov	$1332, DX
   1.352 +
   1.353 +//       }
   1.354 +lzd10:
   1.355 +	push	CX		// CX = 0
   1.356 +
   1.357 +//       { /* get len */
   1.358 +//         int numBits, offset;
   1.359 +//         CProb *probLen = prob + LenChoice /*0*/;
   1.360 +//         numBits = kLenNumLowBits /*3*/;
   1.361 +
   1.362 +	movb	$8, %cl		// numBits : 3,3,8
   1.363 +
   1.364 +//         if (Bit0(probLen)) {
   1.365 +
   1.366 +	call	Bit1dx		// Bit1(prob)
   1.367 +	xchg	AX, BX
   1.368 +	inc	DX
   1.369 +	jnc	lzd15		// bx=0
   1.370 +
   1.371 +//           probLen = prob + LenLow/*2*/ + (posState << kLenNumLowBits/*3*/);
   1.372 +//           offset = 0;
   1.373 +//         }
   1.374 +//         else {
   1.375 +//           probLen = prob + LenChoice2 /*1*/;
   1.376 +
   1.377 +	call	Bit1dx		// Bit1(prob)
   1.378 +	add	AX, BX
   1.379 +
   1.380 +#if PROP_PB != 0
   1.381 +	inc	AX		// ah=0
   1.382 +#endif
   1.383 +	jc	lzd16		// %ax=0, %bx=-2 
   1.384 +lzd15:
   1.385 +#if PROP_PB != 0
   1.386 +	movb	$8, %al
   1.387 +	mulb	posState(BP)
   1.388 +#endif
   1.389 +
   1.390 +//           if (Bit0(probLen)) {
   1.391 +//             probLen = prob + LenMid/*130*/ + (posState << kLenNumMidBits/*3*/);
   1.392 +
   1.393 +	movb	$3, %cl		// numBits : 3,3,8
   1.394 +lzd16:
   1.395 +#if PROP_PB != 0
   1.396 +	add	$2-128-1, AX	// probLen : 2,130,258
   1.397 +#else
   1.398 +	mov	$2-128-1, AX	// probLen : 2,130,258
   1.399 +#endif
   1.400 +	add	DX, AX
   1.401 +	mov	$-8+1, DX	// offset  : 0,8,16
   1.402 +lzdargslp:
   1.403 +	add	$8, DX
   1.404 +	add	$128, AX
   1.405 +	inc	BX
   1.406 +	jle	lzdargslp	// leave with bx=1
   1.407 +
   1.408 +//             offset = kLenNumLowSymbols /*8*/;
   1.409 +//             //numBits = kLenNumMidBits /*3*/;
   1.410 +//           }
   1.411 +//           else {
   1.412 +//             probLen = prob + LenHigh /*258*/;
   1.413 +//             offset = kLenNumLowSymbols /*8*/ + kLenNumMidSymbols /*8*/;
   1.414 +//             numBits = kLenNumHighBits /*8*/;
   1.415 +//           }
   1.416 +//         }
   1.417 +//         RangeDecoderBitTreeDecode(probLen, numBits, len); len += offset;
   1.418 +
   1.419 +	push	DX
   1.420 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.421 +	pop	DX
   1.422 +	add	DX, AX		// offset
   1.423 +	pop	DX		// 0
   1.424 +lzd13string:
   1.425 +	push	AX
   1.426 +
   1.427 +// state = (state < kNumLitStates /*7*/) ? dl : dl|3;
   1.428 +
   1.429 +	movb	$7, %cl
   1.430 +	cmpb	%cl, state(BP)
   1.431 +	jb	new_state
   1.432 +	orb	$3, %dl
   1.433 +new_state:
   1.434 +	movb	%dl, state(BP)
   1.435 +
   1.436 +//       } /* get len */
   1.437 +//       if (state < 4) {
   1.438 +
   1.439 +	cmpb	$4-1, %dl
   1.440 +	ja	lzd19
   1.441 +
   1.442 +//         int posSlot;
   1.443 +//         state += kNumLitStates /*7*/;
   1.444 +
   1.445 +	addb	%cl, state(BP)
   1.446 +
   1.447 +//         prob = p + PosSlot /*432*/ + (((len < kNumLenToPosStates /*4*/) ? 
   1.448 +// 		len : kNumLenToPosStates - 1) << kNumPosSlotBits /*6*/);
   1.449 +
   1.450 +	cmp	$4+1, AX
   1.451 +	jb	lzd21
   1.452 +	mov	$3+1, AX
   1.453 +	//??movb	$3+1, %al
   1.454 +
   1.455 +lzd21:
   1.456 +
   1.457 +	dec	CX		// cx = 6
   1.458 +	shl	%cl, AX
   1.459 +	add	$432-64, AX
   1.460 +
   1.461 +//         RangeDecoderBitTreeDecode(prob, kNumPosSlotBits /*6*/, posSlot);
   1.462 +
   1.463 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.464 +
   1.465 +//         if (posSlot >= kStartPosModelIndex /*4*/) {
   1.466 +//           int numDirectBits = ((posSlot >> 1) - 1);
   1.467 +
   1.468 +#ifndef FLAT32
   1.469 +	movw	%cx, 2(%bp, %di)	// %cx = 0
   1.470 +#endif
   1.471 +	mov	AX, (BP, DI)
   1.472 +	mov	AX, CX
   1.473 +	shrw	$1, CX
   1.474 +	dec	CX
   1.475 +	cmpb	$4, %al
   1.476 +	jb	lzd22
   1.477 +
   1.478 +//           rep0 = (2 | ((UInt32)posSlot & 1));
   1.479 +
   1.480 +	andb	%bl, (BP, DI)		// %bx=1
   1.481 +	orb	$2, (BP, DI)
   1.482 +
   1.483 +//           if (posSlot < kEndPosModelIndex /*14*/) {
   1.484 +
   1.485 +	cmpb	$14, %al
   1.486 +	jnb	lzd23
   1.487 +
   1.488 +//             rep0 <<= numDirectBits;
   1.489 +
   1.490 +	neg	AX
   1.491 +	shll	%cl, (BP, DI)
   1.492 +	add	(BP, DI), AX
   1.493 +
   1.494 +//             prob = p + SpecPos /*688*/ + rep0 - posSlot - 1;
   1.495 +
   1.496 +	add	$687, AX
   1.497 +	jmp	lzd24
   1.498 +
   1.499 +//           }
   1.500 +//           else {
   1.501 +lzd23:
   1.502 +//             numDirectBits -= kNumAlignBits /*4*/;
   1.503 +//             do {
   1.504 +//               RC_NORMALIZE; Range >>= 1; rep0 <<= 1;
   1.505 +//               if (Code >= Range) { Code -= Range; rep0 |= 1; }
   1.506 +
   1.507 +lzd23z:
   1.508 +	call	RC_NORMALIZE
   1.509 +	shrl	$1, Range(BP)
   1.510 +	movl	Range(BP), %eax
   1.511 +	cmpl	Code(BP), %eax
   1.512 +	ja	lzd25
   1.513 +	subl	%eax, Code(BP)
   1.514 +	stc
   1.515 +lzd25:
   1.516 +	rcll	$1, (BP, DI)
   1.517 +
   1.518 +//             } while (--numDirectBits != 0);
   1.519 +
   1.520 +	cmp	$4+1, CX
   1.521 +	loopne	lzd23z
   1.522 +
   1.523 +//             prob = p + Align /* 802 */; numDirectBits = kNumAlignBits /*4*/;
   1.524 +//             rep0 <<= numDirectBits;
   1.525 +
   1.526 +	shll	%cl, (BP, DI)
   1.527 +	mov	$802, AX
   1.528 +//           }
   1.529 +
   1.530 +lzd24:
   1.531 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.532 +
   1.533 +//           {
   1.534 +//             int i = 1, mi = 1;
   1.535 +//             do {
   1.536 +//               CProb *prob3 = prob + mi;
   1.537 +//               RC_GET_BIT2(prob3, mi, ; , rep0 |= i);
   1.538 +
   1.539 +	orb	%dh, (BP, DI)	// update rep0 with DirectBits
   1.540 +
   1.541 +//               i <<= 1;
   1.542 +//             } while(--numDirectBits != 0);
   1.543 +//           }
   1.544 +//         } else rep0 = posSlot;
   1.545 +lzd22:
   1.546 +//         if (++rep0 == (UInt32)(0)) break; /* EOF */
   1.547 +
   1.548 +	incl	(BP, DI)
   1.549 +
   1.550 +lzd19:
   1.551 +	pop	CX
   1.552 +	jz	lzdone
   1.553 +
   1.554 +//       }
   1.555 +//       len += kMatchMinLen;/*2*/
   1.556 +
   1.557 +	inc	CX
   1.558 +
   1.559 +//     string: // if (rep0 > nowPos) return LZMA_RESULT_DATA_ERROR;
   1.560 +//       do {
   1.561 +lzd13z:
   1.562 +//         previousByte = outStream[nowPos - rep0];
   1.563 +//         outStream[nowPos++] = previousByte;
   1.564 +
   1.565 +	call	outcharDico 	// %bl = outStream[nowPos++] = outStream[nowPos - rep0]
   1.566 +
   1.567 +//       } while(--len != 0);
   1.568 +
   1.569 +	loop	lzd13z
   1.570 +
   1.571 +//     } /* char/string */
   1.572 +//   }
   1.573 +
   1.574 +	jmp	lzdmainlp
   1.575 +
   1.576 +lzdone:
   1.577 +//   //RC_NORMALIZE;
   1.578 +//   //*inSizeProcessed = (SizeT)(Buffer - inStream); *outSizeProcessed = nowPos;
   1.579 +//   return LZMA_RESULT_OK;
   1.580 +	call	Dico2ESDI	// set es & di (rep0 = 0)
   1.581 +	lea	ws2(BP), SP	// dealloc
   1.582 +	ret	
   1.583 +// }
   1.584 +
   1.585 +// al = outStream[nowPos - rep0];
   1.586 +
   1.587 +/*
   1.588 + * output  es:di, al
   1.589 + * scratch bh, cl, flags
   1.590 + */
   1.591 +
   1.592 +DicoRep02ESDI:
   1.593 +	stc
   1.594 +
   1.595 +// bl = outStream[nowPos];
   1.596 +
   1.597 +/*
   1.598 + * output  es:di, bl
   1.599 + * scratch bh, cl, flags
   1.600 + */
   1.601 + 
   1.602 +Dico2ESDI:
   1.603 +#ifndef FLAT32
   1.604 +	movl	nowPos(%bp), %ebx
   1.605 +	jnc	Dico2ESDIz
   1.606 +	subl	rep0(%bp), %ebx
   1.607 +Dico2ESDIz:
   1.608 +	movw	%bx, %di
   1.609 +	xorw	%bx, %bx
   1.610 +	shrl	$4, %ebx
   1.611 +	movw	%bx, %es
   1.612 +	movb	%es:(%di), %bl
   1.613 +#else
   1.614 +	movl	nowPos(%bp), %edi
   1.615 +	jnc	Dico2ESDIz
   1.616 +	subl	rep0(%bp), %edi
   1.617 +Dico2ESDIz:
   1.618 +	movb	(%edi), %bl
   1.619 +#endif
   1.620 +	ret
   1.621 +
   1.622 +outcharDico:
   1.623 +
   1.624 +// bl = outStream[nowPos++] = outStream[nowPos - rep0]
   1.625 +
   1.626 +/*
   1.627 + * output  es:di, bl
   1.628 + * update  nowPos
   1.629 + * scratch ax, dx, bh, cl, flags
   1.630 + */
   1.631 +
   1.632 +	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0]
   1.633 +	xchg	AX, BX
   1.634 +outchar:
   1.635 +
   1.636 +// bl = outStream[nowPos++] = previousByte = al;
   1.637 +
   1.638 +/*
   1.639 + * output  bl
   1.640 + * update  nowPos
   1.641 + * scratch ax, dx, bh, di, cl, flags
   1.642 + */
   1.643 +
   1.644 +	clc
   1.645 +	call	Dico2ESDI
   1.646 +	stosb
   1.647 +	xchg	AX, BX		// previous byte
   1.648 +
   1.649 +//	int posState = (int)((nowPos) & posStateMask);
   1.650 +
   1.651 +#if PROP_PB != 0 && PROP_LP != 0
   1.652 +	addw	$0x0101, posState2(BP)
   1.653 +	andb	$(((1 << PROP_PB) -1)<<8)+((1 << PROP_LP) -1), posState2(BP)
   1.654 +#else
   1.655 +# if PROP_PB != 0
   1.656 +	incb	posState(BP)
   1.657 +	andb	$((1 << PROP_PB) -1), posState(BP)
   1.658 +# endif
   1.659 +# if PROP_LP != 0
   1.660 +	incb	posState2(BP)
   1.661 +	andb	$((1 << PROP_LP) -1), posState2(BP)
   1.662 +# endif
   1.663 +#endif
   1.664 +	incl	nowPos(BP)
   1.665 +	ret
   1.666 +
   1.667 +//  
   1.668 +// #define RC_NORMALIZE if (Range < kTopValue) 
   1.669 +//    { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
   1.670 +
   1.671 +/*
   1.672 + * update  Range, Code, ds:si
   1.673 + * scratch flags
   1.674 + */
   1.675 +
   1.676 +RC_NORMALIZE:
   1.677 +	cmpb	$0, Range+3(BP)
   1.678 +	jne	RC_NORMALIZE_1
   1.679 +RC_LOAD_BYTE:
   1.680 +	push	AX
   1.681 +	shll	$8, Range(BP)
   1.682 +	shll	$8, Code(BP)
   1.683 +#ifndef FLAT32
   1.684 +	testw	%si, %si
   1.685 +	jns	RC_READ_BYTE
   1.686 +	movw	%ds, %ax
   1.687 +	incw	%ax
   1.688 +	movw	%ax, %ds
   1.689 +	addw	$-16, %si
   1.690 +RC_READ_BYTE:
   1.691 +#endif
   1.692 +	lodsb
   1.693 +	movb	%al, Code(BP)
   1.694 +	pop	AX
   1.695 +RC_NORMALIZE_1:
   1.696 +	ret
   1.697 +
   1.698 +// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.699 +
   1.700 +Bit1state:
   1.701 +	movb	$16, %al
   1.702 +	mulb	state(BP)
   1.703 +# if PROP_PB != 0
   1.704 +	addb	posState(BP), %al
   1.705 +# endif
   1.706 +Bit1axdx:
   1.707 +	add	DX, AX
   1.708 +	jmp	Bit1
   1.709 +
   1.710 +// prob += 12; Bit1(prob)
   1.711 +
   1.712 +Bit1dx12:
   1.713 +	add	$12, DX
   1.714 +Bit1dx:
   1.715 +	mov	DX, AX
   1.716 +
   1.717 +// static int Bit1(CProb *p)
   1.718 +
   1.719 +Bit1:
   1.720 +/*
   1.721 + * input   ax=p
   1.722 + * output  C, ax
   1.723 + * update  bound, Range, Code, ds:si
   1.724 + * scratch flags
   1.725 + */
   1.726 + 
   1.727 +// {
   1.728 +// 	RC_NORMALIZE;
   1.729 +
   1.730 +	call  RC_NORMALIZE		// kill %ax, update %si
   1.731 +
   1.732 +	pushal		// FIXME pushaw?
   1.733 +
   1.734 +	xchg	AX, DI
   1.735 +	add	DI, DI			// short *
   1.736 +	
   1.737 +
   1.738 +// 	bound = (Range>>kNumBitModelTotalBits /*11*/) * *(p);
   1.739 +
   1.740 +	movl	Range(BP), %eax
   1.741 +	shrl	$11, %eax
   1.742 +	movzwl	(BP, DI), %edx
   1.743 +	mull	%edx
   1.744 +
   1.745 +// 	if (Code < bound) {
   1.746 +
   1.747 +	cmpl	Code(BP), %eax
   1.748 +	jbe	Bit1_1
   1.749 +
   1.750 +//    		Range = bound;
   1.751 +
   1.752 +	movl	%eax, Range(BP)
   1.753 +
   1.754 +// 		*(p) += (kBitModelTotal /*2048*/ - *(p)) >> kNumMoveBits /*5*/;
   1.755 +
   1.756 +	movw	$2048, %ax
   1.757 +
   1.758 +// 		return 0;
   1.759 +
   1.760 +	jmp	Bit1_2
   1.761 +
   1.762 +//	}
   1.763 +// 	else {
   1.764 +
   1.765 +Bit1_1:
   1.766 +
   1.767 +//    		Range -= bound; Code -= bound;
   1.768 +
   1.769 +	subl	%eax, Range(BP)
   1.770 +	subl	%eax, Code(BP)
   1.771 +
   1.772 +// 		*(p) -= (*(p)) >> kNumMoveBits /*5*/;
   1.773 +
   1.774 +	movw	$31, %ax
   1.775 +
   1.776 +// 		return 1;
   1.777 +
   1.778 +	stc
   1.779 +Bit1_2:
   1.780 +	pushf
   1.781 +	subw	(BP, DI), %ax
   1.782 +	sarw	$5, %ax
   1.783 +	addw	%ax, (BP, DI)
   1.784 +	popf
   1.785 +	popal		// FIXME popaw?
   1.786 +	sbb	AX, AX
   1.787 +
   1.788 +// 	}
   1.789 +// }
   1.790 +
   1.791 +	ret
   1.792 +
   1.793 +RangeDecoder:
   1.794 +
   1.795 +/*
   1.796 + * input   ax=probs cx=numLevels (< 8) bx=1
   1.797 + * output  ax=res (backward), dh (forward)
   1.798 + * update  bound, Range, Code, ds:si
   1.799 + * scratch flags, cx=0, dl
   1.800 + */
   1.801 + 
   1.802 +	push	BX
   1.803 +	
   1.804 +//   { int i = numLevels; res = 1; 
   1.805 +	mov	BX, DX		// res = 1
   1.806 +	
   1.807 +//   do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); 
   1.808 +
   1.809 +RangeDecoder_1:
   1.810 +	push	AX
   1.811 +	call	Bit1axdx		// C,%ax = Bit1(prob+%ax)
   1.812 +	rclb	$1, %dl			// res <<= 1; res |= C
   1.813 +	andb	%bl, %al		// current bit
   1.814 +	orb	%al, %bh		// store in bh
   1.815 +	shlb	$1, %bl			// update max
   1.816 +	pop	AX
   1.817 +	loop	RangeDecoder_1
   1.818 +
   1.819 +//   res -= (1 << numLevels); }
   1.820 +
   1.821 +	xchg	AX, BX			// move bh to dh
   1.822 +	xchg	AX, DX			// and dl to al
   1.823 +	sub	%dl, %al		// sub max
   1.824 +	pop	BX
   1.825 +	ret