wok-next diff memtest/stuff/unlzma.S @ rev 13287
memtest: lzma compression
author | Pascal Bellard <pascal.bellard@slitaz.org> |
---|---|
date | Sun Aug 26 00:21:22 2012 +0200 (2012-08-26) |
parents | |
children | e76c37f7bfe6 |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/memtest/stuff/unlzma.S Sun Aug 26 00:21:22 2012 +0200 1.3 @@ -0,0 +1,822 @@ 1.4 +// #define RC_NORMALIZE if (Range < kTopValue) { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; } 1.5 +// 1.6 +// #define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound) 1.7 +// #define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits; 1.8 +// #define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits; 1.9 +// 1.10 +//#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \ 1.11 +// { UpdateBit0(p); mi <<= 1; A0; } else \ 1.12 +// { UpdateBit1(p); mi = (mi + mi) + 1; A1; } 1.13 +// 1.14 +// #define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;) 1.15 +// 1.16 +// #define RangeDecoderBitTreeDecode(probs, numLevels, res) \ 1.17 +// { int i = numLevels; res = 1; \ 1.18 +// do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \ 1.19 +// res -= (1 << numLevels); } 1.20 +/* 1.21 + * Compression with : lzma e src dst -eos -pb2 -lp0 -lc3 1.22 + */ 1.23 + 1.24 +#define PROP_PB 2 1.25 +#define PROP_LP 0 1.26 +#define PROP_LC 3 1.27 +#define PROPS (PROP_LC+(PROP_LP*9)+(PROP_PB*45)) 1.28 + 1.29 +// static const Byte *Buffer; 1.30 +// static UInt32 bound, Code, Range; 1.31 + 1.32 +/* 1.33 + * Buffer register DS:SI 1.34 + * all var based ws=ss:bp 1.35 + */ 1.36 + 1.37 +rep0 = -4 // long 1.38 +rep1 = rep0-4 // long 1.39 +rep2 = rep0-8 // long 1.40 +rep3 = rep0-12 // long 1.41 +state = -17 // byte, 0..11 1.42 +posState = state-1 // byte, 0..15 1.43 +posState2 = posState-1 // byte, 0..15 1.44 +scratched = rep0-16 // byte = 1 1.45 +Code = -24 // long 1.46 +outStream = -28 // long 1.47 +nowPos = outStream // long 1.48 +Range = Code-8 // long 1.49 +#define LOCALS 32 1.50 + 1.51 +// int LzmaDecode(CLzmaDecoderState *vs, 1.52 +// const unsigned char *inStream, 1.53 +// unsigned char *outStream) 1.54 +// { 1.55 +// CProb *p = vs->Probs; 1.56 +// SizeT nowPos = 0; 1.57 +// #define posStateMask = (1 << (vs->Properties.pb)) - 1; 1.58 +// #define literalPosMask = (1 << (vs->Properties.lp)) - 1; 1.59 +// int lc = vs->Properties.lc, state = 0, len = 0; 1.60 +// UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; 1.61 +// 1.62 +// { 1.63 +// UInt32 i, numProbs = Literal /*1846*/ 1.64 +// + ((UInt32)LZMA_LIT_SIZE /*768*/ << (lc + vs->Properties.lp)); 1.65 +// for (i = 0; i < numProbs; i++) p[i] = kBitModelTotal /*2048*/ >> 1; 1.66 + 1.67 +#define WS (1846+(768<<(PROP_LC+PROP_LP))) 1.68 +#if (WS+WS+LOCALS) >= 65000 1.69 +/* MAX WS = (1846+(768<<(8+4))) > 3MB! */ 1.70 +#error invalid (lc,lp,pb) : out of memory 1.71 +#endif 1.72 + 1.73 +ws1 = WS 1.74 +ws2 = ws1*2 1.75 +ws = ws2+LOCALS+15 1.76 + 1.77 +#ifndef FLAT32 1.78 +#define AX %ax 1.79 +#define BX %bx 1.80 +#define CX %cx 1.81 +#define DX %dx 1.82 +#define SI %si 1.83 +#define DI %di 1.84 +#define BP %bp 1.85 +#define SP %sp 1.86 +#define CWD cwd 1.87 +#else 1.88 +#define AX %eax 1.89 +#define BX %ebx 1.90 +#define CX %ecx 1.91 +#define DX %edx 1.92 +#define SI %esi 1.93 +#define DI %edi 1.94 +#define BP %ebp 1.95 +#define SP %esp 1.96 +#define CWD cdq 1.97 +#endif 1.98 +/* 1.99 + * LzmaDecode: 1.100 +#ifndef FLAT32 1.101 + * input ds:si=inStream, es:di=outStream 1.102 + * output outStream[], ds:si, es:di 1.103 + .code 16 1.104 +#else 1.105 + * input esi=inStream, edi=outStream 1.106 + * output outStream[], esi, edi 1.107 + .code 32 1.108 +#endif 1.109 + */ 1.110 + 1.111 + mov $ws1, CX 1.112 +lzd1: 1.113 + pushw $2048/2 1.114 + loop lzd1 1.115 + mov SP, BP 1.116 + movb $((LOCALS+3)/4)*2, %cl 1.117 +initlocals: 1.118 + pushl $1 1.119 + loop initlocals 1.120 + 1.121 +#ifndef FLAT32 1.122 + movb $4, %cl 1.123 + movw %es, %bx 1.124 + shrw %cl, %bx 1.125 + movw %es, %dx 1.126 + shlw %cl, %dx 1.127 + addw %dx, %di 1.128 + movw %di, outStream(%bp) 1.129 + adcb %bh, outStream+2(%bp) 1.130 + incw %cx 1.131 +#else 1.132 + movb $5, %cl 1.133 + movl %edi, outStream(%ebp) 1.134 +#endif 1.135 + 1.136 +// Byte previousByte = 0; 1.137 + xor BX, BX 1.138 + 1.139 +// #define RC_INIT(buffer) 1.140 +// Buffer = buffer; Code = 0; Range = 0xFFFFFFFF; 1.141 +// { int i; for(i=0; i<5; i++) { Code = (Code<<8) | RC_READ_BYTE; }} 1.142 +// } 1.143 +// RC_INIT(inStream); 1.144 + 1.145 + add $13, SI // skip header 1.146 +setrep: 1.147 + call RC_LOAD_BYTE 1.148 + decb Range(BP) 1.149 + loop setrep 1.150 + 1.151 +lzdmainlp: 1.152 +// while(1) { 1.153 +// CProb *prob; 1.154 +// int posState = (int)((nowPos) & posStateMask); 1.155 +// 1.156 +// prob = p + IsMatch /*0*/ + (state << kNumPosBitsMax /*4*/) + posState; 1.157 +// if (Bit0(prob)) { /* char */ 1.158 + 1.159 + xor DX, DX 1.160 + call Bit1state // Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState) 1.161 + mov $state, DI 1.162 + jc lzdstring 1.163 + 1.164 +// prob = p + Literal /*1846*/ + (LZMA_LIT_SIZE /*768*/ * 1.165 +// ((((nowPos) & literalPosMask) << lc) + (previousByte >> (8 - lc)))); 1.166 + 1.167 +#if PROP_LC != 0 1.168 + shrb $8-PROP_LC, %bl 1.169 +#endif 1.170 + 1.171 +#if PROP_LP != 0 1.172 + movb posState2(BP), %dl 1.173 + shl $PROP_LC, DX 1.174 + movb $0, %bh 1.175 + add BX, DX 1.176 +#endif 1.177 + 1.178 + movb $3, %ah 1.179 + mul BX // dx = 3*bh 1.180 + add $1846, AX 1.181 + 1.182 +// int symbol = 1; 1.183 + 1.184 + CWD 1.185 + inc DX // symbol = 1 1.186 + xchg AX, CX // save prob 1.187 + 1.188 +// if (state >= kNumLitStates /*7*/) { /* previous was string */ 1.189 +// if (state < 4) state = 0; 1.190 + 1.191 +lzd6z: 1.192 + subb $3, (BP, DI) 1.193 + 1.194 +// if (state < 4) state = 0; 1.195 + 1.196 + jnc lzd6 1.197 + movb %dh, (BP, DI) // %dh = 0 1.198 + 1.199 +lzd6: 1.200 +// else if (state < 10) state -= 3; 1.201 + 1.202 + cmpb $10-3, (BP, DI) 1.203 + 1.204 +// else state -= 6; 1.205 + 1.206 + jnb lzd6z 1.207 + cmpb $7-3-1, (BP, DI) 1.208 + jbe lzd3 1.209 + 1.210 +// int matchByte = outStream[nowPos - rep0]; 1.211 + 1.212 + call DicoRep02ESDI // %bl = outStream[nowPos - rep0]; 1.213 + 1.214 +// do { 1.215 +// int bit; 1.216 +// CProb *probLit; 1.217 +// matchByte <<= 1; bit = (matchByte & 0x100); 1.218 + 1.219 + movb $1, %bh 1.220 +lzd4: 1.221 + shlb $1, %bl // matchByte <<= 1 1.222 + sbb DI, DI // save bit=C 1.223 + 1.224 +// probLit = prob + 0x100 + bit + symbol; 1.225 + 1.226 + mov CX, AX // restore prob 1.227 + adcb %bh, %ah // + bit + 0x100 1.228 + 1.229 +// RC_GET_BIT2(probLit, symbol, if (bit) break, if (!bit) break) 1.230 + 1.231 + call Bit1axdx // C,%ax = Bit1(prob+%ax) 1.232 + rclb $1, %dl // symbol <<= 1; symbol |= C 1.233 + jc lzd5 // if symbol >= 0x100 1.234 + cmp DI, AX 1.235 + jz lzd4 // if bit == Bit1(prob+%ax) 1.236 + 1.237 +// } while (symbol < 0x100); 1.238 +// } 1.239 +lzd3: 1.240 +// while (symbol < 0x100) { 1.241 +// CProb *probLit = prob + symbol; 1.242 +// RC_GET_BIT(probLit, symbol) 1.243 +// } 1.244 + 1.245 + xor BX, BX 1.246 + jmp lzd4 1.247 +lzd5: 1.248 + 1.249 +// outStream[nowPos++] = previousByte = (Byte)symbol; 1.250 + 1.251 + xchg AX, DX 1.252 + call outchar // %bl = outStream[nowPos++] = %al; 1.253 + jmp lzdmainlp 1.254 + 1.255 +// } 1.256 + 1.257 +lzdstring: 1.258 + mov $1, CX 1.259 + 1.260 +// else { /* string */ 1.261 +// prob = p + IsRep /*192*/ + state; 1.262 + 1.263 + movb $192, %dl 1.264 + addb (BP, DI), %dl 1.265 + mov $rep0, DI 1.266 + 1.267 +// if (Bit0(prob)) { 1.268 + 1.269 + call Bit1dx // Bit1(prob) 1.270 + jc lzd8 1.271 + 1.272 +// rep3 = rep2; rep2 = rep1; rep1 = rep0; 1.273 +// state = (state < kNumLitStates /*7*/) ? 0 : 3; 1.274 + 1.275 + stc 1.276 + 1.277 +// prob = p + LenCoder /*818*/; 1.278 + 1.279 + mov $818, DX 1.280 + 1.281 +// } 1.282 + 1.283 + jmp lzd11a 1.284 + 1.285 +// else { 1.286 +lzd8: 1.287 +// prob += kNumStates /*12*/; 1.288 +// if (Bit0(prob)) { 1.289 + call Bit1dx12 // prob += 12; Bit1(prob) 1.290 + jc lzd11 1.291 +// prob = p + IsRep0Long /*240*/ + (state << kNumPosBitsMax /*4*/) 1.292 +// + posState; 1.293 + movb $240, %dl // dh=0 1.294 + 1.295 +// if (Bit0(prob)) { 1.296 + 1.297 + call Bit1state // Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState) 1.298 + jc lzd12 1.299 + 1.300 +// // if (nowPos == 0) return LZMA_RESULT_DATA_ERROR; 1.301 +// state = (state < kNumLitStates /*7*/) ? 9 : 11; 1.302 + 1.303 + movb $9, %dl 1.304 + 1.305 +// len++; goto string; 1.306 + jmp lzd13string // ax = 0 1.307 +// } 1.308 +// } 1.309 +// else { 1.310 +lzd11: 1.311 +// UInt32 distance = rep1; 1.312 +// prob += kNumStates /*12*/; 1.313 +// if (!Bit0(prob)) { 1.314 + 1.315 + call Bit1dx12 // prob += 12; Bit1(prob) 1.316 + jnc lzd11z 1.317 + 1.318 +// prob += kNumStates /*12*/; 1.319 +// if (Bit0(prob)) distance = rep2; 1.320 + 1.321 + call Bit1dx12 // prob += 12; Bit1(prob) 1.322 +lzd11a: 1.323 + adcb %cl, %cl 1.324 + 1.325 +// else { distance = rep3; rep3 = rep2; } 1.326 +// rep2 = rep1; 1.327 +// } 1.328 +// rep1 = rep0; rep0 = distance; 1.329 + 1.330 +lzd11z: 1.331 + shl $2, CX // 8->32 bits 1.332 + sub CX, DI // &rep[cx] 1.333 + movl (BP, DI), %eax 1.334 +rotreplp: 1.335 + movb 4(BP, DI), %bl 1.336 + movb %bl, (BP, DI) 1.337 + inc DI 1.338 + loop rotreplp 1.339 + testb %dh, %dh 1.340 + jnz lzd10 1.341 + movl %eax, (BP, DI) 1.342 + 1.343 +// } 1.344 +lzd12: 1.345 +// state = (state < kNumLitStates /*7*/) ? 8 : 11; 1.346 + 1.347 + movb $0x08, %cl 1.348 + 1.349 +// prob = p + RepLenCoder /*1332*/; 1.350 + 1.351 + mov $1332, DX 1.352 + 1.353 +// } 1.354 +lzd10: 1.355 + push CX // CX = 0 1.356 + 1.357 +// { /* get len */ 1.358 +// int numBits, offset; 1.359 +// CProb *probLen = prob + LenChoice /*0*/; 1.360 +// numBits = kLenNumLowBits /*3*/; 1.361 + 1.362 + movb $8, %cl // numBits : 3,3,8 1.363 + 1.364 +// if (Bit0(probLen)) { 1.365 + 1.366 + call Bit1dx // Bit1(prob) 1.367 + xchg AX, BX 1.368 + inc DX 1.369 + jnc lzd15 // bx=0 1.370 + 1.371 +// probLen = prob + LenLow/*2*/ + (posState << kLenNumLowBits/*3*/); 1.372 +// offset = 0; 1.373 +// } 1.374 +// else { 1.375 +// probLen = prob + LenChoice2 /*1*/; 1.376 + 1.377 + call Bit1dx // Bit1(prob) 1.378 + add AX, BX 1.379 + 1.380 +#if PROP_PB != 0 1.381 + inc AX // ah=0 1.382 +#endif 1.383 + jc lzd16 // %ax=0, %bx=-2 1.384 +lzd15: 1.385 +#if PROP_PB != 0 1.386 + movb $8, %al 1.387 + mulb posState(BP) 1.388 +#endif 1.389 + 1.390 +// if (Bit0(probLen)) { 1.391 +// probLen = prob + LenMid/*130*/ + (posState << kLenNumMidBits/*3*/); 1.392 + 1.393 + movb $3, %cl // numBits : 3,3,8 1.394 +lzd16: 1.395 +#if PROP_PB != 0 1.396 + add $2-128-1, AX // probLen : 2,130,258 1.397 +#else 1.398 + mov $2-128-1, AX // probLen : 2,130,258 1.399 +#endif 1.400 + add DX, AX 1.401 + mov $-8+1, DX // offset : 0,8,16 1.402 +lzdargslp: 1.403 + add $8, DX 1.404 + add $128, AX 1.405 + inc BX 1.406 + jle lzdargslp // leave with bx=1 1.407 + 1.408 +// offset = kLenNumLowSymbols /*8*/; 1.409 +// //numBits = kLenNumMidBits /*3*/; 1.410 +// } 1.411 +// else { 1.412 +// probLen = prob + LenHigh /*258*/; 1.413 +// offset = kLenNumLowSymbols /*8*/ + kLenNumMidSymbols /*8*/; 1.414 +// numBits = kLenNumHighBits /*8*/; 1.415 +// } 1.416 +// } 1.417 +// RangeDecoderBitTreeDecode(probLen, numBits, len); len += offset; 1.418 + 1.419 + push DX 1.420 + call RangeDecoder // %ax=probs, %cx=numLevels, %ax=res 1.421 + pop DX 1.422 + add DX, AX // offset 1.423 + pop DX // 0 1.424 +lzd13string: 1.425 + push AX 1.426 + 1.427 +// state = (state < kNumLitStates /*7*/) ? dl : dl|3; 1.428 + 1.429 + movb $7, %cl 1.430 + cmpb %cl, state(BP) 1.431 + jb new_state 1.432 + orb $3, %dl 1.433 +new_state: 1.434 + movb %dl, state(BP) 1.435 + 1.436 +// } /* get len */ 1.437 +// if (state < 4) { 1.438 + 1.439 + cmpb $4-1, %dl 1.440 + ja lzd19 1.441 + 1.442 +// int posSlot; 1.443 +// state += kNumLitStates /*7*/; 1.444 + 1.445 + addb %cl, state(BP) 1.446 + 1.447 +// prob = p + PosSlot /*432*/ + (((len < kNumLenToPosStates /*4*/) ? 1.448 +// len : kNumLenToPosStates - 1) << kNumPosSlotBits /*6*/); 1.449 + 1.450 + cmp $4+1, AX 1.451 + jb lzd21 1.452 + mov $3+1, AX 1.453 + //??movb $3+1, %al 1.454 + 1.455 +lzd21: 1.456 + 1.457 + dec CX // cx = 6 1.458 + shl %cl, AX 1.459 + add $432-64, AX 1.460 + 1.461 +// RangeDecoderBitTreeDecode(prob, kNumPosSlotBits /*6*/, posSlot); 1.462 + 1.463 + call RangeDecoder // %ax=probs, %cx=numLevels, %ax=res 1.464 + 1.465 +// if (posSlot >= kStartPosModelIndex /*4*/) { 1.466 +// int numDirectBits = ((posSlot >> 1) - 1); 1.467 + 1.468 +#ifndef FLAT32 1.469 + movw %cx, 2(%bp, %di) // %cx = 0 1.470 +#endif 1.471 + mov AX, (BP, DI) 1.472 + mov AX, CX 1.473 + shrw $1, CX 1.474 + dec CX 1.475 + cmpb $4, %al 1.476 + jb lzd22 1.477 + 1.478 +// rep0 = (2 | ((UInt32)posSlot & 1)); 1.479 + 1.480 + andb %bl, (BP, DI) // %bx=1 1.481 + orb $2, (BP, DI) 1.482 + 1.483 +// if (posSlot < kEndPosModelIndex /*14*/) { 1.484 + 1.485 + cmpb $14, %al 1.486 + jnb lzd23 1.487 + 1.488 +// rep0 <<= numDirectBits; 1.489 + 1.490 + neg AX 1.491 + shll %cl, (BP, DI) 1.492 + add (BP, DI), AX 1.493 + 1.494 +// prob = p + SpecPos /*688*/ + rep0 - posSlot - 1; 1.495 + 1.496 + add $687, AX 1.497 + jmp lzd24 1.498 + 1.499 +// } 1.500 +// else { 1.501 +lzd23: 1.502 +// numDirectBits -= kNumAlignBits /*4*/; 1.503 +// do { 1.504 +// RC_NORMALIZE; Range >>= 1; rep0 <<= 1; 1.505 +// if (Code >= Range) { Code -= Range; rep0 |= 1; } 1.506 + 1.507 +lzd23z: 1.508 + call RC_NORMALIZE 1.509 + shrl $1, Range(BP) 1.510 + movl Range(BP), %eax 1.511 + cmpl Code(BP), %eax 1.512 + ja lzd25 1.513 + subl %eax, Code(BP) 1.514 + stc 1.515 +lzd25: 1.516 + rcll $1, (BP, DI) 1.517 + 1.518 +// } while (--numDirectBits != 0); 1.519 + 1.520 + cmp $4+1, CX 1.521 + loopne lzd23z 1.522 + 1.523 +// prob = p + Align /* 802 */; numDirectBits = kNumAlignBits /*4*/; 1.524 +// rep0 <<= numDirectBits; 1.525 + 1.526 + shll %cl, (BP, DI) 1.527 + mov $802, AX 1.528 +// } 1.529 + 1.530 +lzd24: 1.531 + call RangeDecoder // %ax=probs, %cx=numLevels, %ax=res 1.532 + 1.533 +// { 1.534 +// int i = 1, mi = 1; 1.535 +// do { 1.536 +// CProb *prob3 = prob + mi; 1.537 +// RC_GET_BIT2(prob3, mi, ; , rep0 |= i); 1.538 + 1.539 + orb %dh, (BP, DI) // update rep0 with DirectBits 1.540 + 1.541 +// i <<= 1; 1.542 +// } while(--numDirectBits != 0); 1.543 +// } 1.544 +// } else rep0 = posSlot; 1.545 +lzd22: 1.546 +// if (++rep0 == (UInt32)(0)) break; /* EOF */ 1.547 + 1.548 + incl (BP, DI) 1.549 + 1.550 +lzd19: 1.551 + pop CX 1.552 + jz lzdone 1.553 + 1.554 +// } 1.555 +// len += kMatchMinLen;/*2*/ 1.556 + 1.557 + inc CX 1.558 + 1.559 +// string: // if (rep0 > nowPos) return LZMA_RESULT_DATA_ERROR; 1.560 +// do { 1.561 +lzd13z: 1.562 +// previousByte = outStream[nowPos - rep0]; 1.563 +// outStream[nowPos++] = previousByte; 1.564 + 1.565 + call outcharDico // %bl = outStream[nowPos++] = outStream[nowPos - rep0] 1.566 + 1.567 +// } while(--len != 0); 1.568 + 1.569 + loop lzd13z 1.570 + 1.571 +// } /* char/string */ 1.572 +// } 1.573 + 1.574 + jmp lzdmainlp 1.575 + 1.576 +lzdone: 1.577 +// //RC_NORMALIZE; 1.578 +// //*inSizeProcessed = (SizeT)(Buffer - inStream); *outSizeProcessed = nowPos; 1.579 +// return LZMA_RESULT_OK; 1.580 + call Dico2ESDI // set es & di (rep0 = 0) 1.581 + lea ws2(BP), SP // dealloc 1.582 + ret 1.583 +// } 1.584 + 1.585 +// al = outStream[nowPos - rep0]; 1.586 + 1.587 +/* 1.588 + * output es:di, al 1.589 + * scratch bh, cl, flags 1.590 + */ 1.591 + 1.592 +DicoRep02ESDI: 1.593 + stc 1.594 + 1.595 +// bl = outStream[nowPos]; 1.596 + 1.597 +/* 1.598 + * output es:di, bl 1.599 + * scratch bh, cl, flags 1.600 + */ 1.601 + 1.602 +Dico2ESDI: 1.603 +#ifndef FLAT32 1.604 + movl nowPos(%bp), %ebx 1.605 + jnc Dico2ESDIz 1.606 + subl rep0(%bp), %ebx 1.607 +Dico2ESDIz: 1.608 + movw %bx, %di 1.609 + xorw %bx, %bx 1.610 + shrl $4, %ebx 1.611 + movw %bx, %es 1.612 + movb %es:(%di), %bl 1.613 +#else 1.614 + movl nowPos(%bp), %edi 1.615 + jnc Dico2ESDIz 1.616 + subl rep0(%bp), %edi 1.617 +Dico2ESDIz: 1.618 + movb (%edi), %bl 1.619 +#endif 1.620 + ret 1.621 + 1.622 +outcharDico: 1.623 + 1.624 +// bl = outStream[nowPos++] = outStream[nowPos - rep0] 1.625 + 1.626 +/* 1.627 + * output es:di, bl 1.628 + * update nowPos 1.629 + * scratch ax, dx, bh, cl, flags 1.630 + */ 1.631 + 1.632 + call DicoRep02ESDI // %bl = outStream[nowPos - rep0] 1.633 + xchg AX, BX 1.634 +outchar: 1.635 + 1.636 +// bl = outStream[nowPos++] = previousByte = al; 1.637 + 1.638 +/* 1.639 + * output bl 1.640 + * update nowPos 1.641 + * scratch ax, dx, bh, di, cl, flags 1.642 + */ 1.643 + 1.644 + clc 1.645 + call Dico2ESDI 1.646 + stosb 1.647 + xchg AX, BX // previous byte 1.648 + 1.649 +// int posState = (int)((nowPos) & posStateMask); 1.650 + 1.651 +#if PROP_PB != 0 && PROP_LP != 0 1.652 + addw $0x0101, posState2(BP) 1.653 + andb $(((1 << PROP_PB) -1)<<8)+((1 << PROP_LP) -1), posState2(BP) 1.654 +#else 1.655 +# if PROP_PB != 0 1.656 + incb posState(BP) 1.657 + andb $((1 << PROP_PB) -1), posState(BP) 1.658 +# endif 1.659 +# if PROP_LP != 0 1.660 + incb posState2(BP) 1.661 + andb $((1 << PROP_LP) -1), posState2(BP) 1.662 +# endif 1.663 +#endif 1.664 + incl nowPos(BP) 1.665 + ret 1.666 + 1.667 +// 1.668 +// #define RC_NORMALIZE if (Range < kTopValue) 1.669 +// { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; } 1.670 + 1.671 +/* 1.672 + * update Range, Code, ds:si 1.673 + * scratch flags 1.674 + */ 1.675 + 1.676 +RC_NORMALIZE: 1.677 + cmpb $0, Range+3(BP) 1.678 + jne RC_NORMALIZE_1 1.679 +RC_LOAD_BYTE: 1.680 + push AX 1.681 + shll $8, Range(BP) 1.682 + shll $8, Code(BP) 1.683 +#ifndef FLAT32 1.684 + testw %si, %si 1.685 + jns RC_READ_BYTE 1.686 + movw %ds, %ax 1.687 + incw %ax 1.688 + movw %ax, %ds 1.689 + addw $-16, %si 1.690 +RC_READ_BYTE: 1.691 +#endif 1.692 + lodsb 1.693 + movb %al, Code(BP) 1.694 + pop AX 1.695 +RC_NORMALIZE_1: 1.696 + ret 1.697 + 1.698 +// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState) 1.699 + 1.700 +Bit1state: 1.701 + movb $16, %al 1.702 + mulb state(BP) 1.703 +# if PROP_PB != 0 1.704 + addb posState(BP), %al 1.705 +# endif 1.706 +Bit1axdx: 1.707 + add DX, AX 1.708 + jmp Bit1 1.709 + 1.710 +// prob += 12; Bit1(prob) 1.711 + 1.712 +Bit1dx12: 1.713 + add $12, DX 1.714 +Bit1dx: 1.715 + mov DX, AX 1.716 + 1.717 +// static int Bit1(CProb *p) 1.718 + 1.719 +Bit1: 1.720 +/* 1.721 + * input ax=p 1.722 + * output C, ax 1.723 + * update bound, Range, Code, ds:si 1.724 + * scratch flags 1.725 + */ 1.726 + 1.727 +// { 1.728 +// RC_NORMALIZE; 1.729 + 1.730 + call RC_NORMALIZE // kill %ax, update %si 1.731 + 1.732 + pushal // FIXME pushaw? 1.733 + 1.734 + xchg AX, DI 1.735 + add DI, DI // short * 1.736 + 1.737 + 1.738 +// bound = (Range>>kNumBitModelTotalBits /*11*/) * *(p); 1.739 + 1.740 + movl Range(BP), %eax 1.741 + shrl $11, %eax 1.742 + movzwl (BP, DI), %edx 1.743 + mull %edx 1.744 + 1.745 +// if (Code < bound) { 1.746 + 1.747 + cmpl Code(BP), %eax 1.748 + jbe Bit1_1 1.749 + 1.750 +// Range = bound; 1.751 + 1.752 + movl %eax, Range(BP) 1.753 + 1.754 +// *(p) += (kBitModelTotal /*2048*/ - *(p)) >> kNumMoveBits /*5*/; 1.755 + 1.756 + movw $2048, %ax 1.757 + 1.758 +// return 0; 1.759 + 1.760 + jmp Bit1_2 1.761 + 1.762 +// } 1.763 +// else { 1.764 + 1.765 +Bit1_1: 1.766 + 1.767 +// Range -= bound; Code -= bound; 1.768 + 1.769 + subl %eax, Range(BP) 1.770 + subl %eax, Code(BP) 1.771 + 1.772 +// *(p) -= (*(p)) >> kNumMoveBits /*5*/; 1.773 + 1.774 + movw $31, %ax 1.775 + 1.776 +// return 1; 1.777 + 1.778 + stc 1.779 +Bit1_2: 1.780 + pushf 1.781 + subw (BP, DI), %ax 1.782 + sarw $5, %ax 1.783 + addw %ax, (BP, DI) 1.784 + popf 1.785 + popal // FIXME popaw? 1.786 + sbb AX, AX 1.787 + 1.788 +// } 1.789 +// } 1.790 + 1.791 + ret 1.792 + 1.793 +RangeDecoder: 1.794 + 1.795 +/* 1.796 + * input ax=probs cx=numLevels (< 8) bx=1 1.797 + * output ax=res (backward), dh (forward) 1.798 + * update bound, Range, Code, ds:si 1.799 + * scratch flags, cx=0, dl 1.800 + */ 1.801 + 1.802 + push BX 1.803 + 1.804 +// { int i = numLevels; res = 1; 1.805 + mov BX, DX // res = 1 1.806 + 1.807 +// do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); 1.808 + 1.809 +RangeDecoder_1: 1.810 + push AX 1.811 + call Bit1axdx // C,%ax = Bit1(prob+%ax) 1.812 + rclb $1, %dl // res <<= 1; res |= C 1.813 + andb %bl, %al // current bit 1.814 + orb %al, %bh // store in bh 1.815 + shlb $1, %bl // update max 1.816 + pop AX 1.817 + loop RangeDecoder_1 1.818 + 1.819 +// res -= (1 << numLevels); } 1.820 + 1.821 + xchg AX, BX // move bh to dh 1.822 + xchg AX, DX // and dl to al 1.823 + sub %dl, %al // sub max 1.824 + pop BX 1.825 + ret