wok-tiny rev 179

linux: rewrite stuff/pack
author Pascal Bellard <pascal.bellard@slitaz.org>
date Tue Aug 15 09:20:55 2023 +0000 (8 months ago)
parents 5e2b41f82f13
children e05af9592e8b
files linux/receipt linux/stuff/pack linux/stuff/unlz4.S linux/stuff/unlzma.S linux/stuff/unlzsa1.S linux/stuff/unlzsa2.S linux/stuff/unpack.S linux/stuff/unpacklz4.S linux/stuff/unzx0.S
line diff
     1.1 --- a/linux/receipt	Sun Aug 13 19:51:00 2023 +0000
     1.2 +++ b/linux/receipt	Tue Aug 15 09:20:55 2023 +0000
     1.3 @@ -53,8 +53,7 @@
     1.4  	S2bin bootloader
     1.5  	S2bin helper
     1.6  	S2bin unpacklz4
     1.7 -	cp $stuff/pack .
     1.8 -	sh ./pack --build
     1.9 +	cp $stuff/pack $stuff/un*.S .
    1.10  
    1.11  	sed -i '/config %config:/{NNNps/config %config:/%config:/}' Makefile &&
    1.12  	sed -i 's/^config %config:/config:/' Makefile
    1.13 @@ -69,6 +68,7 @@
    1.14  	yes '' | make ARCH=i386 HOSTCC=gcc config
    1.15  	make ARCH=i386 CC=gcc HOSTCC=gcc -j 4 bzImage
    1.16  	cp arch/i386/boot/bzImage arch/i386/boot/bzImage.made.386
    1.17 +	sh ./pack --build arch/i386/boot/bzImage.made.386
    1.18  	
    1.19  	grep -q "CONFIG_MODULES=y" .config &&
    1.20  	make ARCH=i386 CC=gcc HOSTCC=gcc -j 4 modules &&
    1.21 @@ -97,12 +97,12 @@
    1.22  	mkdir $fs/boot
    1.23  	cp -a $src/pack $fs/boot/
    1.24  	cp -a $src/bundle.sh $fs/boot/bundle
    1.25 -	cp -a $src/editcmdline.bin $fs/boot/cmdline.bin
    1.26 +	cp -a $src/cmdline.bin $fs/boot/
    1.27 +	cp -a $src/editcmdline.bin $fs/boot/
    1.28  	for i in '' .386 ; do
    1.29  		[ -s $src/arch/i386/boot/bzImage$i ] || continue
    1.30  		cp -a $src/arch/i386/boot/bzImage$i $fs/boot/
    1.31 -		dd if=$src/bootloader.bin of=$fs/boot/bzImage$i conv=notrunc
    1.32 -		dd if=$src/cmdline.bin bs=1 seek=$(setup_code $fs/boot/bzImage$i) of=$fs/boot/bzImage$i conv=notrunc
    1.33 +		dd if=$src/bootloader.bin of=$fs/boot/bzImage$i conv=notrunc 2> /dev/null
    1.34  		cp -a $src/System.map$i $fs/boot/
    1.35  		cp -a $src/.config$i $fs/boot/config$i
    1.36  	done
    1.37 @@ -131,9 +131,10 @@
    1.38  	for i in $1/boot/*.386 ; do
    1.39  		[ "$CPU586" = "yes" ] && rm -f $i || mv -f $i ${i%.386}
    1.40  	done 2> /dev/null
    1.41 +	[ -s $1/boot/cmdline ] && cat $1/boot/cmdline | sed 's/^ *//' | \
    1.42 +		dd of=$1/boot/bzImage conv=notrunc bs=1 seek=768 count=3072 &&
    1.43 +		dd if=$1/boot/cmdline.bin bs=1 seek=$(setup_code $1/boot/bzImage) of=$1/boot/bzImage conv=notrunc
    1.44  	[ "$EDIT_CMDLINE" ] &&
    1.45 -		dd if=$1/boot/cmdline.bin bs=1 seek=$(setup_code $1/boot/bzImage) of=$1/boot/bzImage conv=notrunc
    1.46 -	[ -s $1/boot/cmdline ] && cat $1/boot/cmdline | sed 's/^ *//' | \
    1.47 -		dd of=$1/boot/bzImage conv=notrunc bs=1 seek=768 count=3072
    1.48 +		dd if=$1/boot/editcmdline.bin bs=1 seek=$(setup_code $1/boot/bzImage) of=$1/boot/bzImage conv=notrunc
    1.49  	$1/boot/pack $1/boot/bzImage
    1.50  }
     2.1 --- a/linux/stuff/pack	Sun Aug 13 19:51:00 2023 +0000
     2.2 +++ b/linux/stuff/pack	Tue Aug 15 09:20:55 2023 +0000
     2.3 @@ -1,98 +1,111 @@
     2.4  #!/bin/sh
     2.5  
     2.6 +get()
     2.7 +{
     2.8 +	echo $(od -j $(($1)) -N ${3:-2} -t u${3:-2} -An "$2")
     2.9 +}
    2.10 +
    2.11  if [ "$1" = "--build" ]; then	#install-begin
    2.12 -	while read type pack args; do
    2.13 -		grep -q "#define COMPRESSION.*$type" unpacklz4.S || continue
    2.14 -		sed -i "s/COMPRESSOR/$pack/;s/COMPRESSION/$pack $args/" $0
    2.15 -	done <<EOT
    2.16 -LZMA	lzma	-si -so $(sed '/^#define PROP_/!d;s|.*_|-|;s| *||;y/PBLPC/pblpc/' unlzma.S) | ddq bs=13 skip=1
    2.17 -LZSA2	lzsa	-r -f 2
    2.18 -LZSA1	lzsa	-r -f 1
    2.19 -LZ4	lz4	-l -12 | ddq bs=4 skip=1
    2.20 -EOT
    2.21 -	uuencode -m - < unpacklz4.bin | sed -e '/^unpacklz4\.bin$/r/dev/stdin' \
    2.22 -		-e '/^unpacklz4\.bin$/d' -i $0
    2.23 -	cat helper.bin 2>/dev/null | uuencode -m - | sed -e \
    2.24 -		'/^helper\.bin$/r/dev/stdin' -e '/^helper\.bin$/d' -i $0
    2.25 -	sed '/install-begin$/,/install-end$/d' -i $0
    2.26 -	exit
    2.27 +	header=$(($(get 0x201 $2 1)+2))
    2.28 +	sed "s|OFS|$((header+0x200))|g" -i $0
    2.29 +	for zip in zx0 lzma lz4 lzsa1 lzsa2 ; do
    2.30 +		ZIP=$(echo $zip | sed 'y/almsxz/ALMSXZ/')
    2.31 +		sed "/endzip:/ {
    2.32 +  h
    2.33 +  r un$zip.S
    2.34 +  g ; N
    2.35 +}" unpack.S > unpack$zip.S
    2.36 +		top=$((0x7900))
    2.37 +		[ "$zip" = "lzma" ] && top=$((0x3900))
    2.38 +		# cc -DTOP=0x7900 [-DSAVEREGS] [-DHEADER=0x38|-DTARGET=[0|0x1F2|0x200]] -o unpack.o -Wa,-a=unpack.lst -c unpack.S
    2.39 +#		cc -DTOP=$top -DSAVEREGS -DHEADER=$header -o un$zip.o -Wa,-a=un$zip.lst -c unpack$zip.S
    2.40 +		cc -DTOP=$top -DHEADER=$header -o un$zip.o -Wa,-a=un$zip.lst -c unpack$zip.S
    2.41 +		objcopy --only-section=.text -O binary un$zip.o un$zip.bin
    2.42 +		uuencode -m - < un$zip.bin | sed -e "/^${ZIP}_CODE\$/r/dev/stdin" -e "/^${ZIP}_CODE\$/d" -i $0
    2.43 +		start=$((0x$(sed '/start$/!d;s|.*text:00000||;s| .*||' un$zip.lst)))
    2.44 +		moved=$((0x$(sed '/moved$/!d;s|.*text:0*||;s| .*||' un$zip.lst)))
    2.45 +		sed "s|${ZIP}_START|$start|;s|${ZIP}_MOVED|$moved|" -i $0
    2.46 +	done
    2.47 +	sed '/install-begin$/,/install-end$/d' -i $0 ; exit
    2.48  fi	#install-end
    2.49  ddq()
    2.50  {
    2.51  	dd "$@" 2> /dev/null
    2.52  }
    2.53  
    2.54 -word()
    2.55 +patch()
    2.56  {
    2.57 -	n=$1; for i in $(seq 1 1 ${2:-2}); do
    2.58 -		printf '\\\\x%02X' $(($n & 255))
    2.59 -		n=$(($n >> 8))
    2.60 -	done | xargs echo -en
    2.61 +	xargs echo -en | ddq bs=1 seek=$2 conv=notrunc of=$1
    2.62  }
    2.63  
    2.64 -store()
    2.65 +add()
    2.66  {
    2.67 -	word $2 "$4" | ddq bs=1 conv=notrunc of="$3" seek=$(($1))
    2.68 +	local tmp=$(($1+$(get $2 $3)))
    2.69 +	printf '\\\\x%02X\\\\x%02X' $((tmp&255)) $((tmp/256)) | patch /tmp/bz$$name $2
    2.70  }
    2.71  
    2.72 -get()
    2.73 -{
    2.74 -	echo $(od -j $(($1)) -N ${3:-2} -t u${3:-2} -An "$2")
    2.75 -}
    2.76 +[ ! -s "$1" ] && echo "Usage: $0 bzImage [salvador|lzma|lz4|lzsa1|lzsa2|zx0]" && exit 1
    2.77  
    2.78 -helper()
    2.79 -{
    2.80 -uudecode << EOT
    2.81 -helper.bin
    2.82 +linux="$1"
    2.83 +zipper="$2"
    2.84 +setup=$(get 0x1F1 $linux 1)
    2.85 +
    2.86 +while read zip start moved cmd ; do
    2.87 +	[ "$zipper" ] && [ "$zip" != "$zipper" ] && continue
    2.88 +	which ${cmd%% *} || continue
    2.89 +	case "$zip" in
    2.90 +	salvador|zx0)
    2.91 +		uudecode - > /tmp/unzip$$ <<EOT
    2.92 +ZX0_CODE
    2.93  EOT
    2.94 -}
    2.95 +		;;
    2.96 +	lzma|xz) continue # use too much memory ?
    2.97 +		uudecode - > /tmp/unzip$$ <<EOT
    2.98 +LZMA_CODE
    2.99 +EOT
   2.100 +		;;
   2.101 +	lz4)
   2.102 +		uudecode - > /tmp/unzip$$ <<EOT
   2.103 +LZ4_CODE
   2.104 +EOT
   2.105 +		;;
   2.106 +	lzsa1)
   2.107 +		uudecode - > /tmp/unzip$$ <<EOT
   2.108 +LZSA1_CODE
   2.109 +EOT
   2.110 +		;;
   2.111 +	lzsa2)
   2.112 +		uudecode - > /tmp/unzip$$ <<EOT
   2.113 +LZSA2_CODE
   2.114 +EOT
   2.115 +	 	;;
   2.116 +	*)	continue
   2.117 +	esac
   2.118 +	ddq if=$linux bs=1 count=OFS of=/tmp/bz$$name
   2.119 +	ddq if=$linux bs=1 skip=OFS count=$(($setup*512+512-OFS)) of=/tmp/bz$$
   2.120 +	${cmd%|*} /tmp/bz$$ /tmp/bz$$.z
   2.121 +	[ "${cmd%|*}" != "${cmd#*|}" ] && ${cmd#*|} < /tmp/bz$$.z > /tmp/bz$$.zz && mv /tmp/bz$$.zz /tmp/bz$$.z
   2.122 +	sz1=$(stat -c %s /tmp/bz$$.z)
   2.123 +	sz2=$(stat -c %s /tmp/unzip$$)
   2.124 +	ddq if=/tmp/unzip$$ bs=1 skip=$start count=$((moved-start)) >> /tmp/bz$$name
   2.125 +	add $sz1 $((start+2+0x200)) /tmp/bz$$name
   2.126 +	add $sz1 $((start+5+0x200)) /tmp/bz$$name
   2.127 +	cat /tmp/bz$$.z >> /tmp/bz$$name
   2.128 +	ddq if=/tmp/unzip$$ bs=1 skip=$moved >> /tmp/bz$$name
   2.129 +	x=$(((sz1+sz2+0x1FF)&0xFE00))
   2.130 +	printf '\\\\x%02X' $((x/512)) | patch /tmp/bz$$name 497
   2.131 +	ddq if=/dev/zero bs=1 count=$((x-sz1-sz2)) >> /tmp/bz$$name
   2.132 +	ddq if=$linux bs=512 skip=$((1+setup)) >> /tmp/bz$$name
   2.133  
   2.134 -[ ! -s "$1" ] && echo "Usage: $0 bzImage" && exit 1
   2.135 +	mv /tmp/bz$$name $linux
   2.136 +	rm /tmp/bz$$ /tmp/bz$$.z
   2.137 +	break
   2.138 +done <<EOT
   2.139 +salvador	ZX0_START	ZX0_MOVED	salvador
   2.140 +zx0		ZX0_START	ZX0_MOVED	zx0
   2.141 +lzsa2		LZSA2_START	LZSA2_MOVED	lzsa -r -f2
   2.142 +lzsa1		LZSA1_START	LZSA1_MOVED	lzsa -r -f1
   2.143 +lz4		LZ4_START	LZ4_MOVED	lz4 -l -12 | ddq bs=4 skip=1
   2.144 +lzma		LZMA_START	LZMA_MOVED	lzma e | ddq bs=13 skip=1
   2.145 +EOT
   2.146  
   2.147 -if which COMPRESSOR > /dev/null ; then
   2.148 -
   2.149 -# boot + head param
   2.150 -param=$((514+$(get 0x201 $1 1)))
   2.151 -ddq if=$1 bs=1 count=$param > /tmp/setup$$
   2.152 -
   2.153 -# unlz4 + data
   2.154 -uudecode >> /tmp/setup$$ << EOT
   2.155 -unpacklz4.bin
   2.156 -EOT
   2.157 -setupsz=$(get 0x1F1 $1 1)
   2.158 -ddq if=$1 bs=1 skip=$param count=$(($setupsz*512-$param+512)) | \
   2.159 -	COMPRESSION >> /tmp/setup$$
   2.160 -
   2.161 -# version string
   2.162 -word $(get 0x20E $1) >> /tmp/setup$$
   2.163 -word $setupsz 1 >> /tmp/setup$$
   2.164 -setupend=$(stat -Lc %s "/tmp/setup$$")
   2.165 -versionofs=
   2.166 -version="$(ddq if=$1 bs=1 skip=$((0x200+$(get 0x20E $1))) | strings | sed q)"
   2.167 -if grep --help 2>&1 | grep -q byte-offset; then
   2.168 -	versionofs=$(grep -obaF "$version" /tmp/setup$$ | sed 's/:.*//')
   2.169 -fi
   2.170 -if [ -z "$versionofs" ]; then
   2.171 -	versionofs=$(stat -c %s /tmp/setup$$)
   2.172 -	echo -en "$version\0" >> /tmp/setup$$
   2.173 -fi
   2.174 -store 0x20E $(($versionofs-512)) /tmp/setup$$
   2.175 -
   2.176 -helpersz=$(helper | wc -c)
   2.177 -newsetupsz=$((($(stat -c %s /tmp/setup$$)+$helpersz-1)/512))
   2.178 -[ $newsetupsz -lt 4 ] && newsetupsz=4
   2.179 -
   2.180 -if [ $newsetupsz -lt $setupsz ]; then
   2.181 -	while [ $(($setupend + $helpersz)) -gt $((512 + 512*$newsetupsz)) ]; do
   2.182 -		newsetupsz=$(($newsetupsz+1))
   2.183 -	done
   2.184 -#	ddq of=/tmp/setup$$ bs=512 seek=$(($newsetupsz+1)) count=0
   2.185 -	store 0x1F1 $newsetupsz /tmp/setup$$ 1
   2.186 -	helper | ddq bs=1 of=/tmp/setup$$ \
   2.187 -		seek=$((512 + 512*$newsetupsz - $helpersz))
   2.188 -	ddq if=$1 bs=512 skip=$((1+$setupsz)) >> /tmp/setup$$
   2.189 -	cp /tmp/setup$$ $1
   2.190 -fi
   2.191 -rm -f /tmp/setup$$
   2.192 -
   2.193 -fi
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/linux/stuff/unlz4.S	Tue Aug 15 09:20:55 2023 +0000
     3.3 @@ -0,0 +1,230 @@
     3.4 +// Lz4Decode:
     3.5 +#ifndef FLAT32
     3.6 +//   input   ds:si=inStream, es:di=outStream
     3.7 +//   output  outStream[], ds:si, es:di
     3.8 +	.code16
     3.9 +#define AX	%ax
    3.10 +#define BX	%bx
    3.11 +#define CX	%cx
    3.12 +#define SI	%si
    3.13 +#define DI	%di
    3.14 +#else
    3.15 +//   input   esi=inStream, edi=outStream
    3.16 +//   output  outStream[], ds:esi, es:edi
    3.17 +	.code32
    3.18 +#define AX	%eax
    3.19 +#define BX	%ebx
    3.20 +#define CX	%ecx
    3.21 +#define SI	%esi
    3.22 +#define DI	%edi
    3.23 +#endif
    3.24 + 
    3.25 +#if defined(FLAT16OUT) || defined(ONLY8086)
    3.26 +#define cAX	%ax
    3.27 +#define cCX	%cx
    3.28 +#define cDX	%dx
    3.29 +#else
    3.30 +#define cAX	%eax
    3.31 +#define cCX	%ecx
    3.32 +#define cDX	%edx
    3.33 +#endif
    3.34 +
    3.35 +#define ARCHIVE_MAGICNUMBER 0x184C2102
    3.36 +
    3.37 +//#define PARANOIA			// cover rare cases, optional
    3.38 +
    3.39 +lz4main:
    3.40 +#ifdef PARANOIA
    3.41 +	cld
    3.42 +# if !defined(FLAT32) && !defined(FLAT16OUT)
    3.43 +	xorl	%cx, %cx
    3.44 +	call	lz4mov
    3.45 +# endif
    3.46 +#endif
    3.47 +#if defined(FLAT16OUT) || defined(ONLY8086)
    3.48 +	lodsw			// get chunkSize
    3.49 +# ifndef NO_LZ4_HEADER
    3.50 +	cmpw	$ARCHIVE_MAGICNUMBER&0xFFFF, %ax
    3.51 +# endif
    3.52 +	xchg	%ax, %dx
    3.53 +	lodsw
    3.54 +# ifndef NO_LZ4_HEADER
    3.55 +	jne	chkeof
    3.56 +	cmpw	$ARCHIVE_MAGICNUMBER>>16, %ax
    3.57 +	je	lz4main
    3.58 +chkeof:	
    3.59 +# endif
    3.60 +# ifdef PARANOIA
    3.61 +	orw	%ax, %dx	// end of file ?
    3.62 +	je	lz4quit
    3.63 +# endif
    3.64 +# ifndef FLAT16OUT
    3.65 +	xchgw	%ax, %bp
    3.66 +	subw	$1, %dx
    3.67 +	sbbw	$0, %bp
    3.68 +# endif
    3.69 +#else
    3.70 +	lodsl			// get chunkSize
    3.71 +# ifndef NO_LZ4_HEADER
    3.72 +	cmpl	$ARCHIVE_MAGICNUMBER, %eax
    3.73 +	je	lz4main
    3.74 +# endif
    3.75 +# ifdef PARANOIA
    3.76 +	orl	%eax, %eax	// end of file ?
    3.77 +	je	lz4quit
    3.78 +# endif
    3.79 +	xchgl	%eax, %edx
    3.80 +#endif
    3.81 +lz4chunk:			// uncompress chunk
    3.82 +	lodsb			// get token
    3.83 +	pushw	%ax
    3.84 +#ifdef ONLY8086
    3.85 +	movb	$4, %cl
    3.86 +	shrb	%cl, %al
    3.87 +#else
    3.88 +	shrb	$4, %al
    3.89 +#endif
    3.90 +	call	lz4len		// get literal length
    3.91 +#if !defined(FLAT16OUT) && defined(ONLY8086)
    3.92 +	subw	%cx, %dx	// count literal
    3.93 +	sbbw	$0, %bp
    3.94 +#else
    3.95 +	sub	cCX, cDX	// count literal
    3.96 +#endif
    3.97 +#if !defined(FLAT32) && !defined(FLAT16OUT) && (!defined(FLAT16) || !defined(PARANOIA))
    3.98 +#define NeedLz4mov
    3.99 +	call	lz4mov		// copy literals
   3.100 +#else
   3.101 +	rep movsb
   3.102 +#endif
   3.103 +	popw	%bx
   3.104 +#if !defined(FLAT16OUT) && defined(ONLY8086)
   3.105 +	subw	$1+2, %dx	// count token & string address
   3.106 +	sbbw	$0, %bp
   3.107 +	jb	lz4quit
   3.108 +lz4cont:
   3.109 +#else
   3.110 +	sub	$1+2, cDX	// count token & string address
   3.111 +	jbe	lz4quit
   3.112 +#endif
   3.113 +	lodsw			// get string address
   3.114 +	xchg	AX, BX
   3.115 +	call	lz4len		// get string length
   3.116 +	add	$4, CX
   3.117 +#if !defined(FLAT32) && !defined(FLAT16OUT)
   3.118 +	pushw	%ds
   3.119 +	pushw	%si
   3.120 +	movw	%di, %si
   3.121 +	subw	%bx, %si
   3.122 +	movw	%es, %ax
   3.123 +	jnc	axok
   3.124 +	subb	$0x10, %ah
   3.125 +axok:
   3.126 +.macro norm	reg
   3.127 +	movw	%si, \reg
   3.128 +	andw	$0xF, %si
   3.129 +# ifdef ONLY8086
   3.130 +	pushw	%cx
   3.131 +	movb	$4, %cl
   3.132 +	shrw	%cl, \reg
   3.133 +# else
   3.134 +	shrw	$4, \reg
   3.135 +# endif
   3.136 +	addw	\reg, %ax
   3.137 +	movw	%ax, %ds
   3.138 +	movw	%di, \reg
   3.139 +	andw	$0xF, %di
   3.140 +# ifdef ONLY8086
   3.141 +	shrw	%cl, \reg
   3.142 +	popw	%cx
   3.143 +# else
   3.144 +	shrw	$4, \reg
   3.145 +# endif
   3.146 +	movw	%es, %ax
   3.147 +	addw	\reg, %ax
   3.148 +	movw	%ax, %es
   3.149 +.endm 
   3.150 +# if !defined (NeedLz4mov)
   3.151 +	norm	%bx
   3.152 +	rep movsb
   3.153 +# else
   3.154 +	call	lz4movStr	// copy string
   3.155 +# endif
   3.156 +	popw	%si
   3.157 +	popw	%ds
   3.158 +#else
   3.159 +# ifdef FASTFILL
   3.160 +	cmp	$FASTFILL,BX
   3.161 +	jbe	lz4fast
   3.162 +# endif
   3.163 +lz4notfast:
   3.164 +	xchg	AX, SI
   3.165 +	mov	DI, SI
   3.166 +	sub	BX, SI
   3.167 +# ifdef ONLY8086
   3.168 +lz4movsb:
   3.169 +	movsb	%es:(SI), %es:(DI)	//  NMOS 8088/8086 workaround.
   3.170 +	loop	lz4movsb
   3.171 +# else
   3.172 +	rep movsb %es:(SI), %es:(DI)
   3.173 +# endif
   3.174 +	xchg	AX, SI
   3.175 +# ifdef FASTFILL
   3.176 +lz4fast:
   3.177 +#  if FASTFILL == 1
   3.178 +	movb	%es:-1(DI), %al
   3.179 +	rep stosb
   3.180 +#  endif
   3.181 +#  if FASTFILL >= 2
   3.182 +	movw	%es:-2(DI), %ax
   3.183 +	je	lz4fastword
   3.184 +	movb	%al, %ah
   3.185 +lz4fastword:
   3.186 +	shr	$1, CX
   3.187 +	rep stosw
   3.188 +	jnc	lz4chunk
   3.189 +	stosb
   3.190 +#  endif
   3.191 +# endif
   3.192 +	jmp	lz4chunk
   3.193 +#endif
   3.194 +
   3.195 +#if defined(NeedLz4mov)
   3.196 +# if defined(PARANOIA)
   3.197 +lz4movlp:
   3.198 +	xchgw	%ax, %cx
   3.199 +	movw	$0x7800, %cx
   3.200 +	rep movsw
   3.201 +	xchgw	%ax, %cx
   3.202 +	sub	$0xF0, %ch
   3.203 +# endif
   3.204 +lz4mov:
   3.205 +	movw	%ds, %ax
   3.206 +lz4movStr:
   3.207 +	norm	%bx
   3.208 +# if defined(PARANOIA)
   3.209 +	cmp	$0xFF, %ch	// catch FFFX case
   3.210 +	jz	lz4movlp
   3.211 +# endif
   3.212 +	rep movsb
   3.213 +	ret
   3.214 +#endif
   3.215 +
   3.216 +lz4len:				// get length in %ecx
   3.217 +	and	$0xF, cAX
   3.218 +	mov	cAX, cCX
   3.219 +	cmpb	$0xF, %al
   3.220 +	jne	lz4quit
   3.221 +lz4len2:
   3.222 +	lodsb
   3.223 +#if !defined(FLAT16OUT) && defined(ONLY8086)
   3.224 +	subw	$1, %dx		// remaining chunk size
   3.225 +	sbbw	$0, %bp
   3.226 +#else
   3.227 +	dec	cDX		// remaining chunk size
   3.228 +#endif
   3.229 +	add	AX, CX
   3.230 +	cmpb	$0xFF, %al
   3.231 +	je	lz4len2
   3.232 +lz4quit:
   3.233 +	ret
     4.1 --- a/linux/stuff/unlzma.S	Sun Aug 13 19:51:00 2023 +0000
     4.2 +++ b/linux/stuff/unlzma.S	Tue Aug 15 09:20:55 2023 +0000
     4.3 @@ -97,11 +97,11 @@
     4.4  #ifndef FLAT32
     4.5   *   input   ds:si=inStream, es:di=outStream
     4.6   *   output  outStream[], ds:si, es:di
     4.7 - 	.code16
     4.8 + 	.code 16
     4.9  #else
    4.10   *   input   esi=inStream, edi=outStream
    4.11   *   output  outStream[], esi, edi
    4.12 - 	.code32
    4.13 + 	.code 32
    4.14  #endif
    4.15   */
    4.16   
    4.17 @@ -252,13 +252,7 @@
    4.18  	
    4.19  //         int matchByte = outStream[nowPos - rep0];
    4.20  
    4.21 -#if defined(FLAT32) || defined(FLAT16OUT)
    4.22 -	mov	nowPos(BP), BX
    4.23 -	sub	rep0(BP), BX
    4.24 -	movb	(BX), %bl
    4.25 -#else
    4.26  	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0];
    4.27 -#endif
    4.28  	
    4.29  //         do {
    4.30  //           int bit;
    4.31 @@ -674,26 +668,12 @@
    4.32  lzd13z:
    4.33  //         previousByte = outStream[nowPos - rep0];
    4.34  //         outStream[nowPos++] = previousByte;
    4.35 +
    4.36 +	call	outcharDico 	// %bl = outStream[nowPos++] = outStream[nowPos - rep0]
    4.37 +
    4.38  //       } while(--len != 0);
    4.39  
    4.40 -#if defined(FLAT32) || defined(FLAT16OUT)
    4.41 -	push	SI
    4.42 -	mov	nowPos(BP), SI
    4.43 -	sub	rep0(BP), SI
    4.44 -lzd13zflat:
    4.45 -# ifdef FLAT32
    4.46 -	lodsb	%ds:(SI)
    4.47 -# else
    4.48 -	lodsb	%es:(SI)
    4.49 -# endif
    4.50 -	call	outchar
    4.51 -	loop	lzd13zflat
    4.52 -	pop	SI
    4.53 -#else
    4.54 -	call	outcharDico 	// %bl = outStream[nowPos++] = outStream[nowPos - rep0]
    4.55  	loop	lzd13z
    4.56 -#endif
    4.57 -
    4.58  
    4.59  //     } /* char/string */
    4.60  //   }
    4.61 @@ -704,18 +684,13 @@
    4.62  //   //RC_NORMALIZE;
    4.63  //   //*inSizeProcessed = (SizeT)(Buffer - inStream); *outSizeProcessed = nowPos;
    4.64  //   return LZMA_RESULT_OK;
    4.65 -#if !defined(FLAT32) && !defined(FLAT16OUT)
    4.66  	call	Dico2ESDI	// set es & di (rep0 = 0)
    4.67 -#else
    4.68 -	movw	nowPos(BP), DI
    4.69 -#endif
    4.70  	lea	ws2(BP), SP	// dealloc
    4.71  	ret	
    4.72  // }
    4.73  
    4.74  // al = outStream[nowPos - rep0];
    4.75  
    4.76 -#if !defined(FLAT32) && !defined(FLAT16OUT)
    4.77  /*
    4.78   * output  es:di, al
    4.79   * scratch bh, cl, flags
    4.80 @@ -732,6 +707,7 @@
    4.81   */
    4.82   
    4.83  Dico2ESDI:
    4.84 +#if !defined(FLAT32) && !defined(FLAT16OUT)
    4.85  # ifdef ONLY8086
    4.86  	pushw	%ax
    4.87  	movw	nowPos(%bp), %bx
    4.88 @@ -759,6 +735,17 @@
    4.89  	shrl	$4, %ebx
    4.90  # endif
    4.91  	movw	%bx, %es
    4.92 +#else
    4.93 +	mov	nowPos(BP), DI
    4.94 +	jnc	Dico2ESDIz
    4.95 +	sub	rep0(BP), DI
    4.96 +Dico2ESDIz:
    4.97 +#endif
    4.98 +#ifdef FLAT32
    4.99 +	movb	(DI), %bl
   4.100 +#else
   4.101 +	movb	%es:(%di), %bl
   4.102 +#endif
   4.103  	ret
   4.104  
   4.105  outcharDico:
   4.106 @@ -786,12 +773,6 @@
   4.107  	clc
   4.108  	call	Dico2ESDI
   4.109  	stosb
   4.110 -#else
   4.111 -outchar:
   4.112 -	movw	nowPos(%bp), DI
   4.113 -	stosb
   4.114 -	movw	DI, nowPos(%bp)
   4.115 -#endif
   4.116  	xchg	AX, BX		// previous byte
   4.117  
   4.118  //	int posState = (int)((nowPos) & posStateMask);
   4.119 @@ -809,13 +790,13 @@
   4.120  	andb	$((1 << PROP_LP) -1), posState2(BP)
   4.121  # endif
   4.122  #endif
   4.123 -#if !defined(FLAT32) && !defined(FLAT16OUT)
   4.124 -	inc	nowPos(BP)
   4.125 -# if defined(ONLY8086)
   4.126 +#ifdef ONLY8086
   4.127 +	incw	nowPos(BP)
   4.128  	jnz	incnowPosDone
   4.129  	incw	nowPos+2(BP)
   4.130  incnowPosDone:
   4.131 -# endif
   4.132 +#else
   4.133 +	incl	nowPos(BP)
   4.134  #endif
   4.135  	ret
   4.136  
     5.1 --- a/linux/stuff/unlzsa1.S	Sun Aug 13 19:51:00 2023 +0000
     5.2 +++ b/linux/stuff/unlzsa1.S	Tue Aug 15 09:20:55 2023 +0000
     5.3 @@ -1,143 +1,211 @@
     5.4 -//  based on
     5.5 -//  decompress_small.S - space-efficient decompressor implementation for 8088
     5.6 -//
     5.7 -//  Copyright (C) 2019 Emmanuel Marty
     5.8 -//
     5.9 -//  This software is provided 'as-is', without any express or implied
    5.10 -//  warranty.  In no event will the authors be held liable for any damages
    5.11 -//  arising from the use of this software.
    5.12 -//
    5.13 -//  Permission is granted to anyone to use this software for any purpose,
    5.14 -//  including commercial applications, and to alter it and redistribute it
    5.15 -//  freely, subject to the following restrictions:
    5.16 -//
    5.17 -//  1. The origin of this software must not be misrepresented; you must not
    5.18 -//     claim that you wrote the original software. If you use this software
    5.19 -//     in a product, an acknowledgment in the product documentation would be
    5.20 -//     appreciated but is not required.
    5.21 -//  2. Altered source versions must be plainly marked as such, and must not be
    5.22 -//     misrepresented as being the original software.
    5.23 -//  3. This notice may not be removed or altered from any source distribution.
    5.24 -
    5.25 -//  ---------------------------------------------------------------------------
    5.26 -//  Decompress raw LZSA1 block
    5.27 -//  inputs:
    5.28 -//  * %ds:%si: raw LZSA1 block
    5.29 -//  * %es:%di: output buffer
    5.30 -//  ---------------------------------------------------------------------------
    5.31 -
    5.32 +// Lzsa1Decode:
    5.33 +#ifndef FLAT32
    5.34 +//   input   ds:si=inStream, es:di=outStream
    5.35 +//   output  outStream[], ds:si, es:di
    5.36  	.code16
    5.37 -lzsa1_decompress:
    5.38 -	//pushw	%di		// remember decompression offset
    5.39 -	//cld			// make string operations (lods, movs, stos..) move forward
    5.40 -
    5.41 -lzsa1_decode_token:
    5.42 -	xorb	%ah, %ah	// clear %ah
    5.43 -	lodsb			// read token byte: O|LLL|MMMM
    5.44 -	movw	%ax,%bx		// keep token in %bl
    5.45 -	
    5.46 -	andb	$0x70, %al	// isolate literals length in token (LLL)
    5.47 -	je	lzsa1_check_offset_size	// if LLL=0, we have no literals; goto match
    5.48 -#ifdef ONLY8086
    5.49 -	movb	$4, %cl
    5.50 -	shrb	%cl, %al	// shift literals length into place
    5.51 +#define AX	%ax
    5.52 +#define BX	%bx
    5.53 +#define SI	%si
    5.54 +#define DI	%di
    5.55  #else
    5.56 -	shrb	$4, %al		// shift literals length into place
    5.57 +//   input   esi=inStream, edi=outStream
    5.58 +//   output  outStream[], ds:esi, es:edi
    5.59 +	.code32
    5.60 +#define AX	%eax
    5.61 +#define BX	%ebx
    5.62 +#define SI	%esi
    5.63 +#define DI	%edi
    5.64  #endif
    5.65  
    5.66 -	cmpb	$7, %al		// LITERALS_RUN_LEN?
    5.67 -	jne	lzsa1_got_literals	// no, we have the full literals count from the token, go copy
    5.68 +MATCH_RUN_LEN		=	15
    5.69 +LITERALS_RUN_LEN	=	7
    5.70 +MIN_MATCH_SIZE		=	3
    5.71 +MIN_LITERALS_SIZE	=	0
    5.72  
    5.73 -	lodsb                   // grab extra length byte
    5.74 -	addb	$7, %al		// add LITERALS_RUN_LEN
    5.75 -	jnc	lzsa1_got_literals	// if no overflow, we have the full literals count, go copy
    5.76 -	je	lzsa1_big_literals
    5.77 +#define PACKED_ONLY			// assume no copy block, optional
    5.78 +//#define PARANOIA			// cover rare cases, optional
    5.79 + 
    5.80 +.macro	shrclw cnt,obj
    5.81 +#ifdef ONLY8086
    5.82 +	movb	\cnt, %cl
    5.83 +	shrw	%cl, \obj
    5.84 +#else
    5.85 +	shrw	\cnt, \obj
    5.86 +#endif
    5.87 +.endm
    5.88  
    5.89 -	movb	$1, %ah		// add 256 (I'd prefer 'xchgb %al, %ah'	max 1791 instead of 511)
    5.90 -	lodsb			// grab single extra length byte
    5.91 -	.byte	0x3C		// mask lodsw with cmpb $0xAD, %al
    5.92 -				// (*like jmp short lzsa1_got_literals but faster)
    5.93 -
    5.94 -lzsa1_big_literals:
    5.95 -	lodsw			// grab 16-bit extra length
    5.96 -
    5.97 -lzsa1_got_literals:
    5.98 +#ifdef FLAT16OUT
    5.99 +#define RAW_FORMAT
   5.100 +#endif
   5.101 +lzsa1main:
   5.102 +#ifdef PARANOIA
   5.103 +	cld
   5.104 +#endif
   5.105 +#ifndef RAW_FORMAT
   5.106 +# ifndef NO_LZSA1_HEADER
   5.107 +	lodsw
   5.108 +	cmpw	$0x9E7B, %ax	// magic
   5.109 +	jne	lzsa1main
   5.110 +	lodsb
   5.111 +//	cmpb	$0, %al		// lzsa1
   5.112 +//	jne	lzsa1main
   5.113 +# endif
   5.114 +	xorw	%ax, %ax
   5.115 +	xchgw	%ax, %di
   5.116 +	shrclw	$4, %ax
   5.117 +	jmp	lzsa1blockz	// %di *MUST* be paragraph aligned
   5.118 +# ifndef PACKED_ONLY
   5.119 +lzsa1copy:
   5.120 +	movsb			// handle 64K case
   5.121 +	decw	%cx
   5.122 +	rep	movsb		// copy block
   5.123 +# endif
   5.124 +lzsa1block:			// uncompress chunk
   5.125 +	movw	$0x1000, %ax
   5.126 +lzsa1blockz:	
   5.127 +	movw	%es, %bx
   5.128 +	addw	%ax, %bx
   5.129 +	movw	%bx, %es
   5.130 +# ifndef FLAT16
   5.131 +	movw	%si, %ax
   5.132 +	andw	$0xf, %si
   5.133 +	shrclw	$4, %ax
   5.134 +	movw	%ds, %bx
   5.135 +	addw	%ax, %bx
   5.136 +	movw	%bx, %ds
   5.137 +# endif
   5.138 +	lodsw			// block size
   5.139  	xchgw	%ax, %cx
   5.140 -#ifdef USE_MOVSW
   5.141 -	shrw	$1, %cx
   5.142 -	rep movsw
   5.143 -	adcw	%cx, %cx
   5.144 +	movw	%cx, %dx
   5.145 +	lodsb
   5.146 +# ifndef PACKED_ONLY
   5.147 +	orb	%al, %al
   5.148 +	js	lzsa1copy
   5.149 +	jne	lzsa1full	// 64Kb block
   5.150 +# endif
   5.151 +	jcxz	lzsa1quit	// bail if we hit EOD
   5.152 +lzsa1full:
   5.153 +	addw	%si, %dx
   5.154  #endif
   5.155 -	rep movsb		// copy %cx literals from %ds:%si to %es:%di
   5.156 -
   5.157 -lzsa1_check_offset_size:
   5.158 +lzsa1chunk:			// uncompress chunk
   5.159 +	lodsb			// get token O|LLL|MMMM
   5.160 +	movb	%al, %bl	// keep token in bl
   5.161 +	shrclw	$4, %ax		// shift literals length into place
   5.162 +	movw	$LITERALS_RUN_LEN+256*MIN_LITERALS_SIZE, %cx
   5.163 +	call	lzsa1len	// %ch = LITERALS_RUN_LEN
   5.164 +	rep	movsb		// copy %cx literals from %ds:%si to %es:%di
   5.165 +#ifndef RAW_FORMAT
   5.166 +	cmpw	%dx, %si
   5.167 +	je	lzsa1block	// bail if we hit EOD
   5.168 +#endif
   5.169 +#ifdef FLAT32
   5.170 +	orl	$-1, %eax
   5.171 +#endif
   5.172  	testb	%bl, %bl	// check match offset size in token (O bit)
   5.173 -	js	lzsa1_get_long_offset
   5.174 -
   5.175 -	decw	%cx
   5.176 -	xchgw	%ax, %cx	// %ah to 0xff - %cx was zero from the rep movsb above
   5.177 +	js	lzsa1LongOfs
   5.178 +#ifndef FLAT32
   5.179 +	movb	$-1, %ah	// set offset bits 15-8 to 1
   5.180 +#endif
   5.181  	lodsb
   5.182  	.byte	0x3C		// mask lodsw with cmpb $0xAD, %al
   5.183 -				// (*like jmp short lzsa1_get_match_length but faster)
   5.184 -
   5.185 -lzsa1_get_long_offset:
   5.186 -	lodsw			// Get 2-byte match offset
   5.187 -
   5.188 -lzsa1_get_match_length:
   5.189 -	xchgw	%ax, %bx	// %bx: match offset  %ax: original token
   5.190 -	andb	$0xF, %al	// isolate match length in token (MMMM)
   5.191 -	addb	$3, %al		// add MIN_MATCH_SIZE
   5.192 -
   5.193 -	cmpb	$0x12, %al	// MATCH_RUN_LEN?
   5.194 -	jne	lzsa1_got_matchlen	// no, we have the full match length from the token, go copy
   5.195 -
   5.196 -	lodsb			// grab extra length byte
   5.197 -	addb	$0x12, %al	// add MIN_MATCH_SIZE + MATCH_RUN_LEN
   5.198 -	jnc	lzsa1_got_matchlen	// if no overflow, we have the entire length
   5.199 -	je	lzsa1_big_matchlen       
   5.200 -
   5.201 -	movb	$1, %ah		// add 256 (I'd prefer 'xchgb %al, %ah'	max 3071 instead of 511)
   5.202 -	lodsb			// grab single extra length byte
   5.203 -	.byte	0x3C		// mask lodsw with cmpb $0xAD, %al
   5.204 -				// (*like jmp short lzsa1_got_matchlen but faster)
   5.205 -lzsa1_big_matchlen:
   5.206 -	lodsw			// grab 16-bit length
   5.207 -
   5.208 -lzsa1_got_matchlen:
   5.209 -	xchgw	%ax, %cx	// copy match length into %cx
   5.210 -	jcxz	lzsa1_done_decompressing	// bail if we hit EOD
   5.211 -	xchgw	%ax, %si	// save %si (current pointer to compressed data)
   5.212 -	leaw	(%bx,%di), %si	// %es:%si now points at back reference in output data
   5.213 -#ifdef USE_MOVSW
   5.214 -	cmpw	$-2, %bx
   5.215 -	jae	lzsa1_store
   5.216 -	shrw	$1, %cx
   5.217 -	rep movsw %es:(%si), %es:(%di)
   5.218 -	adcw	%cx, %cx
   5.219 -#endif
   5.220 -	rep movsb %es:(%si), %es:(%di)		// copy match
   5.221 -	xchgw	%ax, %si	// restore %ds:%si
   5.222 -	jmp	lzsa1_decode_token	// go decode another token
   5.223 -#ifdef USE_MOVSW
   5.224 -lzsa1_store:
   5.225 -	je	lzsa1_store_word
   5.226 -	lodsb	%es:(%si)
   5.227 -	movb	%al, %ah	
   5.228 -	.byte	0x3D		// mask lodsw with cmpb $0x26AD, %ax
   5.229 -				// (*like jmp short lzsa1_store_byte but faster)
   5.230 -lzsa1_store_word:
   5.231 -	lodsw	%es:(%si)
   5.232 -lzsa1_store_byte:
   5.233 -	shrw	$1, %cx
   5.234 -	rep	stosw
   5.235 -	adcw	%cx, %cx
   5.236 -	rep	stosb
   5.237 -	xchgw	%ax, %si	// restore %ds:%si
   5.238 -	jmp	lzsa1_decode_token	// go decode another token
   5.239 +lzsa1LongOfs:
   5.240 +	lodsw
   5.241 +	xchg	AX, BX		// %bx: match offset  %ax: original token
   5.242 +	movw	$MATCH_RUN_LEN+256*MIN_MATCH_SIZE, %cx
   5.243 +	call	lzsa1len
   5.244 +#ifdef RAW_FORMAT
   5.245 +	jcxz	lzsa1quit	// bail if we hit EOD
   5.246  #endif
   5.247  
   5.248 -lzsa1_done_decompressing:
   5.249 -//	popw	%ax		// retrieve the original decompression offset
   5.250 -//	xchgw	%ax, %di	// compute decompressed size
   5.251 -//	subw	%di, %ax
   5.252 -	ret			// done
   5.253 +#if !defined(FLAT16OUT) && !defined(FLAT32)
   5.254 +	xchg	AX, SI		// save %si	
   5.255 +	lea	(BX,DI), SI
   5.256 +	pushw	%ds
   5.257 +	movw	%es, %bp
   5.258 +	cmpw	%si, %di
   5.259 +	jnc	lzsa1sameSeg
   5.260 +	pushw	%si
   5.261 +# ifdef ONLY8086
   5.262 +	pushw	%cx
   5.263 +# endif
   5.264 +	shrclw	$4, %si
   5.265 +# ifdef ONLY8086
   5.266 +	popw	%cx
   5.267 +# endif
   5.268 +	lea	-4096(%bp,%si), %bp
   5.269 +	popw	%si
   5.270 +	andw	$0xF, %si
   5.271 +lzsa1sameSeg:
   5.272 +	movw	%bp, %ds
   5.273 +# ifdef FASTFILL
   5.274 +	cmp	$-FASTFILL,BX
   5.275 +	jae	lzsa1fast
   5.276 +# endif
   5.277 +	rep movsb
   5.278 +lzsa1chunkz:
   5.279 +	popw	%ds
   5.280 +#else
   5.281 +# ifdef FASTFILL
   5.282 +	cmp	$-FASTFILL,BX
   5.283 +	jae	lzsa1fast
   5.284 +# endif
   5.285 +	xchg	AX, SI		// save %si	
   5.286 +	lea	(BX,DI), SI
   5.287 +# ifdef ONLY8086
   5.288 +lzsa2movsb:
   5.289 +	movsb	%es:(SI), %es:(DI)	//  NMOS 8088/8086 workaround.
   5.290 +	loop	lzsa2movsb
   5.291 +# else
   5.292 +	rep movsb	%es:(SI), %es:(DI)
   5.293 +# endif
   5.294 +#define lzsa1chunkz lzsa1chunk
   5.295 +#endif
   5.296 +	xchg	AX, SI		// restore %si	
   5.297 +	jmp	lzsa1chunk
   5.298 +#ifdef FASTFILL
   5.299 +lzsa1fast:
   5.300 +# if FASTFILL == 1
   5.301 +#  if !defined(FLAT16OUT) && !defined(FLAT32)
   5.302 +	lodsb
   5.303 +#  else
   5.304 +	movb	%es:(BX,DI), %al
   5.305 +#  endif
   5.306 +	rep stosb
   5.307 +# endif
   5.308 +# if FASTFILL == 2
   5.309 +#  if !defined(FLAT16OUT) && !defined(FLAT32)
   5.310 +	lodsw
   5.311 +#  else
   5.312 +	movw	%es:(BX,DI), %ax
   5.313 +#  endif
   5.314 +	je	lzsa1fastword
   5.315 +	movb	%ah, %al
   5.316 +lzsa1fastword:
   5.317 +	shr	$1, CX
   5.318 +	rep stosw
   5.319 +	jnc	lzsa1chunkz
   5.320 +	stosb
   5.321 +# endif
   5.322 +	jmp	lzsa1chunkz
   5.323 +#endif
   5.324 +
   5.325 +lzsa1len:			// get length in %ecx
   5.326 +	andb	%cl, %al
   5.327 +	cbw			// clear %ah
   5.328 +	cmpb	%al, %cl
   5.329 +	jne	lzsa1minNumber	// S=0-6, L=0-14		%cx = %ch + %al   if (%al & %cl != %cl)
   5.330 +	addb	%al, %ch
   5.331 +	lodsb
   5.332 +lzsa1minNumber:
   5.333 +	addb	%ch, %al
   5.334 +	jnc	lzsa1gotNumber  // 0-255			%cx = %ch + %cl + byte   if (%al & %cl == %cl && %ch + %cl + byte < 0x100)
   5.335 +	movb	%al, %ah	// S=256-1791, L=256-4607 or S=256-511, L=256-511
   5.336 +	jne	lzsa1midNumber
   5.337 +	lodsw			// 0-65535			%cx = word   if (%al & %cl == %cl && %ch + %cl + byte == 0x100)
   5.338 +	.byte	0x3C		// mask lodsb with cmpb $0xAC, %al
   5.339 +lzsa1midNumber:
   5.340 +	lodsb			//				%cx = (%ch + %cl + byte)*256 + byte2   if (%al & %cl == %cl && %ch + %cl + byte > 0x100)
   5.341 +lzsa1gotNumber:
   5.342 +	xchgw	%ax, %cx
   5.343 +lzsa1quit:
   5.344 +	ret
     6.1 --- a/linux/stuff/unlzsa2.S	Sun Aug 13 19:51:00 2023 +0000
     6.2 +++ b/linux/stuff/unlzsa2.S	Tue Aug 15 09:20:55 2023 +0000
     6.3 @@ -1,177 +1,279 @@
     6.4 -//  based on
     6.5 -//  decompress_small.S - space-efficient decompressor implementation for 8088
     6.6 -//
     6.7 -//  Copyright (C) 2019 Emmanuel Marty
     6.8 -//
     6.9 -//  This software is provided 'as-is', without any express or implied
    6.10 -//  warranty.  In no event will the authors be held liable for any damages
    6.11 -//  arising from the use of this software.
    6.12 -//
    6.13 -//  Permission is granted to anyone to use this software for any purpose,
    6.14 -//  including commercial applications, and to alter it and redistribute it
    6.15 -//  freely, subject to the following restrictions:
    6.16 -//
    6.17 -//  1. The origin of this software must not be misrepresented; you must not
    6.18 -//     claim that you wrote the original software. If you use this software
    6.19 -//     in a product, an acknowledgment in the product documentation would be
    6.20 -//     appreciated but is not required.
    6.21 -//  2. Altered source versions must be plainly marked as such, and must not be
    6.22 -//     misrepresented as being the original software.
    6.23 -//  3. This notice may not be removed or altered from any source distribution.
    6.24 -
    6.25 -//  ---------------------------------------------------------------------------
    6.26 -//  Decompress raw LZSA2 block
    6.27 -//  inputs:
    6.28 -//  * %ds:%si: raw LZSA2 block
    6.29 -//  * %es:%di: output buffer
    6.30 -//  ---------------------------------------------------------------------------
    6.31 -
    6.32 +// Lzsa2Decode:
    6.33 +#ifndef FLAT32
    6.34 +//   input   ds:si=inStream, es:di=outStream
    6.35 +//   output  outStream[], ds:si, es:di
    6.36  	.code16
    6.37 -lzsa2_decompress:
    6.38 -	//pushw	%di		// remember decompression offset
    6.39 -	//cld			// make string operations (lods, movs, stos..) move forward
    6.40 -
    6.41 -	xorw	%cx, %cx
    6.42 -	movw	$0x100, %bx
    6.43 -	xorw	%bp, %bp
    6.44 -
    6.45 -lzsa2_decode_token:
    6.46 -	movw	%cx, %ax	// clear %ah - %cx is zero from above or from after rep movsb in lzsa2_copy_match
    6.47 -	lodsb			// read token byte: XYZ|LL|MMMM
    6.48 -	movw	%ax,%dx		// keep token in %dl
    6.49 -	
    6.50 -	andb	$0x18, %al	// isolate literals length in token (LL)
    6.51 -#ifdef ONLY8086
    6.52 -	movb	$3, %cl
    6.53 -	shrb	%cl, %al	// shift literals length into place
    6.54 +#define AX	%ax
    6.55 +#define BX	%bx
    6.56 +#define SI	%si
    6.57 +#define DI	%di
    6.58  #else
    6.59 -	shrb	$3, %al		// shift literals length into place
    6.60 +//   input   esi=inStream, edi=outStream
    6.61 +//   output  outStream[], ds:esi, es:edi
    6.62 +	.code32
    6.63 +#define AX	%eax
    6.64 +#define BX	%ebx
    6.65 +#define SI	%esi
    6.66 +#define DI	%edi
    6.67  #endif
    6.68  
    6.69 -	cmpb	$3, %al		// LITERALS_RUN_LEN_V2?
    6.70 -	jne	lzsa2_got_literals	// no, we have the full literals count from the token, go copy
    6.71 +MATCH_RUN_LEN		=	7
    6.72 +LITERALS_RUN_LEN	=	3
    6.73 +MIN_MATCH_SIZE		=	2
    6.74 +MIN_LITERALS_SIZE	=	0
    6.75  
    6.76 -	call	lzsa2_get_nibble	// get extra literals length nibble
    6.77 -	addb	%cl, %al	// add len from token to nibble 
    6.78 -	cmpb	$0x12, %al      // LITERALS_RUN_LEN_V2 + 15 ?
    6.79 -	jne	lzsa2_got_literals	// if not, we have the full literals count, go copy
    6.80 -	lodsb                   // grab extra length byte
    6.81 -	addb	$0x12, %al	// overflow?
    6.82 -	jnc	lzsa2_got_literals	// if not, we have the full literals count, go copy
    6.83 +#define PACKED_ONLY			// assume no copy block, optional
    6.84 +//#define PARANOIA			// cover rare cases, optional
    6.85 + 
    6.86 +.macro	shrcl cnt,obj
    6.87 +#ifdef ONLY8086
    6.88 +	movb	\cnt, %cl
    6.89 +	shr	%cl, \obj
    6.90 +#else
    6.91 +	shr	\cnt, \obj
    6.92 +#endif
    6.93 +.endm
    6.94  
    6.95 -	lodsw			// grab 16-bit extra length
    6.96 +#ifdef FLAT16OUT
    6.97 +#define RAW_FORMAT
    6.98 +#endif
    6.99 +lzsa2main:
   6.100 +#ifdef PARANOIA
   6.101 +	cld
   6.102 +#endif
   6.103 +#ifndef RAW_FORMAT
   6.104 +# ifndef NO_LZSA2_HEADER
   6.105 +	lodsw
   6.106 +	cmpw	$0x9E7B, %ax	// magic
   6.107 +	jne	lzsa2main
   6.108 +	lodsb
   6.109 +//	testb	$0x20, %al	// lzsa2
   6.110 +//	je	lzsa2main
   6.111 +# endif
   6.112 +	xorw	%ax, %ax
   6.113 +	cwd			// no nibble stored
   6.114 +	xchgw	%ax, %di
   6.115 +	shrcl	$4, %ax
   6.116 +	jmp	lzsa2blockz	// %di *MUST* be paragraph aligned
   6.117 +# ifndef PACKED_ONLY
   6.118 +lzsa2copy:
   6.119 +	movsb			// handle 64K case
   6.120 +	decw	%cx
   6.121 +	rep	movsb		// copy block
   6.122 +# endif
   6.123 +lzsa2block:			// uncompress chunk
   6.124 +	movw	$0x1000, %ax
   6.125 +lzsa2blockz:	
   6.126 +	movw	%es, %bx
   6.127 +	addw	%ax, %bx
   6.128 +	movw	%bx, %es
   6.129 +# ifndef FLAT16
   6.130 +	movw	%si, %ax
   6.131 +	andw	$0xf, %si
   6.132 +	shrcl	$4, %ax
   6.133 +	movw	%ds, %bx
   6.134 +	addw	%ax, %bx
   6.135 +	movw	%bx, %ds
   6.136 +# endif
   6.137 +	lodsw			// block size
   6.138 +	xchgw	%ax, %cx
   6.139 +	movw	%cx, %bp
   6.140 +	lodsb
   6.141 +# ifndef PACKED_ONLY
   6.142 +	orb	%al, %al
   6.143 +	js	lzsa2copy
   6.144 +	jne	lzsa2full	// 64Kb block
   6.145 +# endif
   6.146 +	jcxz	lzsa2quit	// bail if we hit EOD
   6.147 +lzsa2full:
   6.148 +	addw	%si, %bp
   6.149 +#else
   6.150 +	movb	$0, %dh		// no nibble stored
   6.151 +#endif
   6.152 +#ifdef FLAT32
   6.153 +	orl	$-1, %ebx	// set offset bits 31-16 to 1
   6.154 +#endif
   6.155 +lzsa2chunk:			// uncompress chunk
   6.156 +	lodsb			// get token XYZ|LL|MMM
   6.157 +	pushw	%ax		// keep token
   6.158 +	movw	$LITERALS_RUN_LEN+256*MIN_LITERALS_SIZE, %cx
   6.159 +	shrb	%cl, %al	// shift literals length into place
   6.160 +	call	lzsa2len	// %cl = LITERALS_RUN_LEN
   6.161 +	rep	movsb		// copy %cx literals from %ds:%si to %es:%di
   6.162 +	popw	%ax		// restore token
   6.163 +#ifndef RAW_FORMAT
   6.164 +	cmpw	%bp, %si
   6.165 +	je	lzsa2block	// bail if we hit EOD
   6.166 +#endif
   6.167 +	pushw	%ax		// save token
   6.168 +// Decode XYZ bits to SZC flags
   6.169 +	andb	$0xE0, %al	// filter XYZ
   6.170 +	movb	%al, %ah
   6.171 +	orb	$0xDF, %al
   6.172 +	incw	%ax
   6.173 +	sahf			// ah=SZ.A.P.C
   6.174 +	je	no_nibble_in_offset	// Y=1
   6.175  
   6.176 -lzsa2_got_literals:
   6.177 -	xchgw	%ax, %cx
   6.178 -	rep movsb		// copy %cx literals from %ds:%si to %es:%di
   6.179 +	call	getNibble       // get nibble for offset bits 0-3, kill %cl
   6.180 +	sahf
   6.181 +	rclb	$1, %al
   6.182 +	xorb	$0xE1, %al	// set offset bits 7-5 to 1
   6.183 +	sahf
   6.184 +	cbw			// set offset bits 15-8 to 1
   6.185 +// 00Z 5-bit offset: read a nibble for offset bits 1-4 and use the inverted bit Z of the token as bit 0 of the offset.
   6.186 +//                   set bits 5-15 of the offset to 1.
   6.187 +	jns	get_match_length	// 5-bit offset
   6.188 +//10Z 13-bit offset: read a nibble for offset bits 9-12 and use the inverted bit Z for bit 8 of the offset,
   6.189 +//                   then read a byte for offset bits 0-7. set bits 13-15 of the offset to 1.
   6.190 +//                   substract 512 from the offset to get the final value.
   6.191 +	subb	$2, %al		// substract 512, clear C
   6.192 +	jmp	get_match_length_1
   6.193  
   6.194 -	testb	$0xC0, %dl	// check match offset size in token (X bit)
   6.195 -	js	lzsa2_rep_match_or_large_offset
   6.196 -
   6.197 -	//cmpb	$0x40, %dl	// check if this is a 5 or 9-bit offset (Y bit)
   6.198 -				// discovered via the test with bit 6 set
   6.199 -	xchgw	%ax, %cx	// clear %ah - %cx was zero from the rep movsb above
   6.200 -	jne	lzsa2_offset_9_bit
   6.201 -
   6.202 -				// 5 bit offset
   6.203 -	cmpb	$0x20, %dl	// test bit 5
   6.204 -	call	lzsa2_get_nibble_x
   6.205 -	jmp	lzsa2_dec_offset_top
   6.206 -
   6.207 -lzsa2_offset_9_bit:		// 9 bit offset
   6.208 -	lodsb			// get 8 bit offset from stream in A
   6.209 -	decb	%ah		// set offset bits 15-8 to 1
   6.210 -	testb	$0x20, %dl	// test bit Z (offset bit 8)
   6.211 -	je	lzsa2_get_match_length
   6.212 -lzsa2_dec_offset_top:
   6.213 -	decb	%ah		// clear bit 8 if Z bit is clear
   6.214 -				// or set offset bits 15-8 to 1
   6.215 -	jmp	lzsa2_get_match_length
   6.216 -
   6.217 -lzsa2_rep_match_or_large_offset:
   6.218 -	//cmpb	$0xC0, %dl	// check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
   6.219 -	jpe	lzsa2_rep_match_or_16_bit
   6.220 -
   6.221 -				// 13 bit offset
   6.222 -
   6.223 -	cmpb	$0xA0, %dl	// test bit 5 (knowing that bit 7 is also set)
   6.224 -	xchgb	%al, %ah
   6.225 -	call	lzsa2_get_nibble_x
   6.226 -	subb	$2, %al		// substract 512
   6.227 -	jmp	lzsa2_get_match_length_1
   6.228 -
   6.229 -lzsa2_rep_match_or_16_bit:
   6.230 -	testb	$0x20, %dl	// test bit Z (offset bit 8)
   6.231 -	jne	lzsa2_repeat_match	// rep-match
   6.232 -
   6.233 -				// 16 bit offset
   6.234 -	lodsb			// Get 2-byte match offset
   6.235 -
   6.236 -lzsa2_get_match_length_1:
   6.237 -	xchgb	%al, %ah
   6.238 -	lodsb			// load match offset bits 0-7
   6.239 -
   6.240 -lzsa2_get_match_length:
   6.241 -	xchgw	%ax, %bp	// %bp: offset
   6.242 -lzsa2_repeat_match:
   6.243 -	xchgw	%ax, %dx	// %ax: original token
   6.244 -	andb	$7, %al		// isolate match length in token (MMM)
   6.245 -	addb	$2, %al		// add MIN_MATCH_SIZE_V2
   6.246 -
   6.247 -	cmpb	$9, %al		// MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
   6.248 -	jne	lzsa2_got_matchlen	// no, we have the full match length from the token, go copy
   6.249 -
   6.250 -	call	lzsa2_get_nibble	// get extra literals length nibble
   6.251 -	addb	%cl, %al	// add len from token to nibble 
   6.252 -	cmpb	$0x18, %al	// MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
   6.253 -	jne	lzsa2_got_matchlen	// no, we have the full match length from the token, go copy
   6.254 -
   6.255 -	lodsb			// grab extra length byte
   6.256 -	addb	$0x18, %al	// overflow?
   6.257 -	jnc	lzsa2_got_matchlen	// if not, we have the entire length
   6.258 -	je	lzsa2_done_decompressing	// detect EOD code
   6.259 -
   6.260 -	lodsw			// grab 16-bit length
   6.261 -
   6.262 -lzsa2_got_matchlen:
   6.263 -	xchgw	%ax, %cx	// copy match length into %cx
   6.264 -	xchgw	%ax, %si	// save %si (current pointer to compressed data)
   6.265 -	leaw	(%bx,%di), %si	// %es:%si now points at back reference in output data
   6.266 -	rep movsb %es:(%si), %es:(%di)		// copy match
   6.267 -	xchgw	%ax, %si	// restore %si
   6.268 -	jmp	lzsa2_decode_token	// go decode another token
   6.269 -
   6.270 -//lzsa2_done_decompressing:
   6.271 -//	popw	%ax		// retrieve the original decompression offset
   6.272 -//	xchgw	%ax, %di	// compute decompressed size
   6.273 -//	subw	%di, %ax
   6.274 -//	ret			// done
   6.275 -
   6.276 -lzsa2_get_nibble_x:
   6.277 -	cmc			// carry set if bit 4 was set
   6.278 -	rcrb	$1, %al
   6.279 -	call	lzsa2_get_nibble	// get nibble for offset bits 0-3
   6.280 -	orb	%cl, %al	// merge nibble
   6.281 -	rolb	$1, %al
   6.282 -	xorb	$0xE1, %al	// set offset bits 7-5 to 1
   6.283 -lzsa2_done_decompressing:
   6.284 +getNibble:
   6.285 +	xorb	$0xF0, %dh	// toggle nibble stored flags
   6.286 +	movb	%dh, %al
   6.287 +	jns	gotnibble
   6.288 +	lodsb
   6.289 +	movb	$0xF0, %dh
   6.290 +	orb	%al, %dh
   6.291 +	shrcl	$4, %al
   6.292 +gotnibble:
   6.293 +lzsa2quit:
   6.294  	ret
   6.295  
   6.296 -lzsa2_get_nibble:
   6.297 -	negb	%bh		// nibble ready?
   6.298 -	jns	lzsa2_has_nibble
   6.299 -	
   6.300 -	xchgw	%ax, %bx
   6.301 -	lodsb			// load two nibbles
   6.302 -	xchgw	%ax, %bx
   6.303 +no_nibble_in_offset:
   6.304 +	movb	$-1, %ah	// set offset bits 15-8 to 1
   6.305 +	jns	offset_9_bit	// X=0 Y=1
   6.306 +	jc	repeat_match	// rep-match X=1 Y=1 Z=1
   6.307 +//110 16-bit offset: read a byte for offset bits 8-15, then another byte for offset bits 0-7.
   6.308 +	lodsb			// Get 2-byte match offset
   6.309 +get_match_length_1:
   6.310 +	xchgb	%al, %ah
   6.311  
   6.312 -lzsa2_has_nibble:
   6.313 -#ifdef ONLY8086
   6.314 -	movb	$4, %cl		// swap 4 high and low bits of nibble
   6.315 -	rorb	%cl, %bl
   6.316 +offset_9_bit:	// Z=C
   6.317 +// 01Z 9-bit offset: read a byte for offset bits 0-7 and use the inverted bit Z for bit 8 of the offset.
   6.318 +//                   set bits 9-15 of the offset to 1.
   6.319 +	sbbb	%ch, %ah	// clear bit 8 if Z bit is clear (%ch == 0)
   6.320 +	lodsb			// load match offset bits 0-7
   6.321 +get_match_length:
   6.322 +	xchgw	%ax, %bx	// bx: offset
   6.323 +repeat_match:
   6.324 +//111 repeat offset: reuse the offset value of the previous match command.
   6.325 +
   6.326 +	popw	%ax		// restore original token in %al
   6.327 +	movw	$MATCH_RUN_LEN+256*MIN_MATCH_SIZE, %cx
   6.328 +	call	lzsa2len
   6.329 +#if !defined(ENHANCED_FORMAT) && defined(RAW_FORMAT)
   6.330 +	je	lzsa2quit	// bail if we hit EOD in (%si-2)
   6.331 +#endif
   6.332 +
   6.333 +#if !defined(FLAT16OUT) && !defined(FLAT32)
   6.334 +	xchg	AX, SI		// save %si	
   6.335 +	lea	(BX,DI), SI
   6.336 +	pushw	%ds
   6.337 +# ifndef RAW_FORMAT
   6.338 +	pushw	%bp		// save end
   6.339 +# endif
   6.340 +	movw	%es, %bp
   6.341 +	cmpw	%si, %di
   6.342 +	jnc	lzsa2sameSeg
   6.343 +	pushw	%si
   6.344 +# ifdef ONLY8086
   6.345 +	pushw	%cx
   6.346 +# endif
   6.347 +	shrcl	$4, %si
   6.348 +# ifdef ONLY8086
   6.349 +	popw	%cx
   6.350 +# endif
   6.351 +	lea	-4096(%bp,%si), %bp
   6.352 +	popw	%si
   6.353 +	andw	$0xF, %si
   6.354 +lzsa2sameSeg:
   6.355 +	movw	%bp, %ds
   6.356 +# ifdef FASTFILL
   6.357 +	cmp	$-FASTFILL,BX
   6.358 +	jae	lzsa2fast
   6.359 +# endif
   6.360 +	rep movsb
   6.361 +lzsa2chunkz:
   6.362 +# ifndef RAW_FORMAT
   6.363 +	popw	%bp		// restore end
   6.364 +# endif
   6.365 +	popw	%ds
   6.366  #else
   6.367 -	rorb	$4, %bl
   6.368 +# ifdef FASTFILL
   6.369 +	cmp	$-FASTFILL,BX
   6.370 +	jae	lzsa2fast
   6.371 +# endif
   6.372 +	xchg	AX, SI		// save %si	
   6.373 +	lea	(BX,DI), SI
   6.374 +# ifdef ONLY8086
   6.375 +lzsa2movsb:
   6.376 +	movsb	%es:(SI), %es:(DI)	//  NMOS 8088/8086 workaround.
   6.377 +	loop	lzsa2movsb
   6.378 +# else
   6.379 +	rep movsb	%es:(SI), %es:(DI)
   6.380 +# endif
   6.381 +#define lzsa2chunkz lzsa2chunk
   6.382  #endif
   6.383 -	movb	$0xF, %cl
   6.384 -	andb	%bl, %cl
   6.385 +	xchg	AX, SI		// restore %si	
   6.386 +	jmp	lzsa2chunk
   6.387 +#ifdef FASTFILL
   6.388 +lzsa2fast:
   6.389 +# if FASTFILL == 1
   6.390 +#  if !defined(FLAT16OUT) && !defined(FLAT32)
   6.391 +	lodsb
   6.392 +#  else
   6.393 +	movb	%es:(BX,DI), %al
   6.394 +#  endif
   6.395 +	rep stosb
   6.396 +# endif
   6.397 +# if FASTFILL == 2
   6.398 +#  if !defined(FLAT16OUT) && !defined(FLAT32)
   6.399 +	lodsw
   6.400 +#  else
   6.401 +	movw	%es:(BX,DI), %ax
   6.402 +#  endif
   6.403 +	je	lzsa2fastword
   6.404 +	movb	%ah, %al
   6.405 +lzsa2fastword:
   6.406 +	shr	$1, CX
   6.407 +	rep stosw
   6.408 +	jnc	lzsa2chunkz
   6.409 +	stosb
   6.410 +# endif
   6.411 +	jmp	lzsa2chunkz
   6.412 +#endif
   6.413 +
   6.414 +lzsa2len:			// get length in %cx
   6.415 +	andb	%cl, %al
   6.416 +	cbw			// clear %ah
   6.417 +	cmpb	%al, %cl
   6.418 +	jne	lzsa2minNumber	// S=0-2, L=0-6			%cx = %ch + %al   if (%al & %cl != %cl)
   6.419 +	addb	%al, %ch
   6.420 +	call	getNibble	// kill %cl
   6.421 +	cmp	$0xF, %al
   6.422 +	jne	lzsa2minNumber	//				%cx = %ch + %cl + nibble   if (%al & %cl == %cl && nibble != 0xF)
   6.423 +	addb	%al, %ch
   6.424 +	lodsb
   6.425 +lzsa2minNumber:
   6.426 +	addb	%ch, %al
   6.427 +	jnc	lzsa2gotNumber  // 0-255			%cx = %ch + %cl + 0xF + byte   if (%al & %cl == %cl && nibble == 0xF && %ch + %cl + 0xF + byte < 0x100)
   6.428 +#ifdef ENHANCED_FORMAT
   6.429 +	decw	%ax
   6.430 +	je	lzsa2maxNumber
   6.431 +# ifdef RAW_FORMAT
   6.432 +	jns	lzsa2midNumber
   6.433 +	popw	%ax
   6.434 +	ret			// bail if we hit EOD
   6.435 +lzsa2midNumber:
   6.436 +# endif
   6.437 +	xchgb	%al, %ah	// S=256-4351, L=256-5887
   6.438 +	lodsb			//				%cx = (%ch + %cl + 0xE + byte)*256 + byte2   if (%al & %cl == %cl && nibble == 0xF && %ch + %cl + 0xF + byte > 0x100)
   6.439 +	.byte	0xB1		// mask lodsw with movb $0xAD, %cl
   6.440 +lzsa2maxNumber:
   6.441 +#endif
   6.442 +	lodsw			// 0-65535			%cx = word   if (%al & %cl == %cl && nibble == 0xF && %ch + %cl + 0xF + byte == 0x100)
   6.443 +lzsa2gotNumber:
   6.444 +	xchgw	%ax, %cx
   6.445  	ret
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux/stuff/unpack.S	Tue Aug 15 09:20:55 2023 +0000
     7.3 @@ -0,0 +1,62 @@
     7.4 +// cc -DTOP=topmem [-DSAVEREGS] [-DHEADER=0x38|-DTARGET=[0|0x1F2|0x200]] -o unpack.o -Wa,-a=unpack.lst -c unpack.S
     7.5 +// objcopy --only-section=.text -O binary unpack.o unpack.bin
     7.6 +
     7.7 +	.code16
     7.8 +
     7.9 +.zero:
    7.10 +#ifdef HEADER
    7.11 +	.org	HEADER
    7.12 +# define TARGET 0x200+HEADER
    7.13 +#else
    7.14 +# ifndef TARGET
    7.15 +# define TARGET 0x200
    7.16 +# endif
    7.17 +#endif
    7.18 +
    7.19 +#define PACKSZ 0
    7.20 +
    7.21 +.start:
    7.22 +#ifdef SAVEREGS
    7.23 +	pushaw
    7.24 +#else
    7.25 +	std
    7.26 +#endif
    7.27 +	movw	$PACKSZ+.end-.moved, %cx
    7.28 +	movw	$0x200+PACKSZ+.end-1, %si
    7.29 +	movw	$TOP-1, %di
    7.30 +	pushw	%ds
    7.31 +	popw	%es
    7.32 +#ifdef SAVEREGS
    7.33 +	std
    7.34 +#endif
    7.35 +	rep	movsb
    7.36 +	cld
    7.37 +	leaw	1(%di), %si
    7.38 +	movw	$TARGET, %di
    7.39 +#ifdef SAVEREGS
    7.40 +//	jmp	TOP-0x200+.endzip-.end
    7.41 +	jmp	TOP-0x200+.endzip-.end+.zero-.moved+2 // ???
    7.42 +#else
    7.43 +	pushw	%cx
    7.44 +//	jmp	TOP-0x200+.unzip-.end
    7.45 +	jmp	TOP-0x200+.unzip-.end+.zero-.moved+2 // ???
    7.46 +#endif
    7.47 +
    7.48 +// ==============================================================================
    7.49 +
    7.50 +#define FLAT16
    7.51 +#define FLAT16OUT
    7.52 +#define RAW_FORMAT
    7.53 +#define NO_LZ4_HEADER
    7.54 +#define NO_LZMA_HEADER
    7.55 +
    7.56 +.moved:
    7.57 +.unzip:
    7.58 +.endzip:
    7.59 +#ifdef SAVEREGS
    7.60 +	call	.unzip
    7.61 +	popaw
    7.62 +	pushw	$0
    7.63 +	ret
    7.64 +#endif
    7.65 +.end:
     8.1 --- a/linux/stuff/unpacklz4.S	Sun Aug 13 19:51:00 2023 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,157 +0,0 @@
     8.4 -// objdump -D -b binary -mi386 -Maddr16,data16 mbr
     8.5 -
     8.6 -	.code16
     8.7 -	.org	0
     8.8 -
     8.9 -// LZ4 LZSA1 LZSA2 or LZMA
    8.10 -#define COMPRESSION_LZ4
    8.11 -#define HOLE		0x4000
    8.12 -#define DS_OFS		0x0200
    8.13 -
    8.14 -start:
    8.15 -	cld
    8.16 -#define packed_moved	HOLE+0x200+packed
    8.17 -	movw	$packed_moved-next, %di
    8.18 -	xorw	%si, %si
    8.19 -	movw	%di, %cx
    8.20 -	pushw	%ds
    8.21 -	popw	%es
    8.22 -	rep movsb %ds:(%si), %es:(%di)	// move setup
    8.23 -	pushw	%cs			// CS
    8.24 -	pushw	%cx			// IP = 0
    8.25 -	//pushaw
    8.26 -	call	packed_moved
    8.27 -next:
    8.28 -	popw	%bx
    8.29 -	leaw	DS_OFS+start-next(%bx), %di
    8.30 -	leaw	DS_OFS+packed-next(%bx,%si), %si
    8.31 -#if  defined(COMPRESSION_LZMA)
    8.32 -	call	unlzma
    8.33 -#elif defined(COMPRESSION_LZSA2)
    8.34 -	call	unlzsa2
    8.35 -#elif defined(COMPRESSION_LZSA1)
    8.36 -	call	unlzsa1
    8.37 -#else
    8.38 -
    8.39 -#define ARCHIVE_MAGICNUMBER	0x184C2102
    8.40 -#define NEGATIVE_OFFSETS	0	// non standard hack
    8.41 -#define LENGTH_16BITS		0	// non standard hack
    8.42 -#define BYTE_RLE		0
    8.43 -
    8.44 -lz4main:
    8.45 -	lodsl				// get chunkSize
    8.46 -	//cmpl	$ARCHIVE_MAGICNUMBER, %eax
    8.47 -	//je	lz4main
    8.48 -	//lodsw
    8.49 -	addw	%si, %ax
    8.50 -	xchgw	%ax, %dx
    8.51 -	//lodsw
    8.52 -	jmp	lz4chunk		// %cx = 0
    8.53 -
    8.54 -lz4len:					// get length in %cx
    8.55 -	andw	$0xF, %ax
    8.56 -	cmpb	$0xF, %al
    8.57 -	xchgw	%ax, %cx
    8.58 -	jne	lz4quit
    8.59 -lz4len2:
    8.60 -	lodsb
    8.61 -	addw	%ax, %cx
    8.62 -	cmpb	$0xFF, %al
    8.63 -#if LENGTH_16BITS
    8.64 -	jne	lz4quit
    8.65 -	lodsw
    8.66 -	addw	%ax, %cx
    8.67 -#else
    8.68 -	je	lz4len2
    8.69 -#endif
    8.70 -lz4quit:
    8.71 -	ret
    8.72 -
    8.73 -#if BYTE_RLE
    8.74 -	movb	-1(%di), %al
    8.75 -	rep stosb %es:(%di)		// fill string
    8.76 -	jmp	lz4chunk
    8.77 -#endif
    8.78 -
    8.79 -lz4string:
    8.80 -	lodsw				// get string offset
    8.81 -	xchgw	%ax, %bx
    8.82 -	call	lz4len			// get string length
    8.83 -	add	$4, %cx			// minimum match is 4
    8.84 -#if BYTE_RLE
    8.85 -	decw	%bx
    8.86 -	jz	lz4rle
    8.87 -# if NEGATIVE_OFFSETS
    8.88 -	incw	%bx
    8.89 -# else
    8.90 -	notw	%bx
    8.91 -# endif
    8.92 -#endif
    8.93 -	pushw	%si
    8.94 -#if NEGATIVE_OFFSETS || BYTE_RLE
    8.95 -	leaw	(%bx,%di), %si
    8.96 -#else
    8.97 -	movw	%di, %si
    8.98 -	subw	%bx, %si
    8.99 -#endif
   8.100 -#ifdef USE_MOVSW
   8.101 -# if NEGATIVE_OFFSETS || BYTE_RLE
   8.102 -	cmpw	$-2, %bx
   8.103 -	jb	lzs4_move_words
   8.104 -# else
   8.105 -	cmpw	$2, %bx
   8.106 -	ja	lzs4_move_words
   8.107 -# endif
   8.108 -	lodsw	%es:(%si)
   8.109 -	je	lzs4_store_word	
   8.110 -	movb	%al, %ah
   8.111 -lzs4_store_word:
   8.112 -	shrw	$1, %cx
   8.113 -	rep stosw %es:(%di)
   8.114 -	adcw	%cx, %cx
   8.115 -	rep stosb %es:(%di)
   8.116 -lzs4_move_words:
   8.117 -	shrw	$1, %cx
   8.118 -	rep movsw %es:(%si), %es:(%di)
   8.119 -	adcw	%cx, %cx
   8.120 -#endif
   8.121 -	rep movsb %es:(%si), %es:(%di)	// copy string
   8.122 -	popw	%si
   8.123 -
   8.124 -lz4chunk:				// uncompress chunk
   8.125 -	lodsb				// get token
   8.126 -	movb	%al, %bl
   8.127 -	shrb	$4, %al
   8.128 -	call	lz4len			// get literal length
   8.129 -#ifdef USE_MOVSW
   8.130 -	shrw	$1, %cx
   8.131 -	rep movsw %ds:(%si), %es:(%di)
   8.132 -	adcw	%cx, %cx
   8.133 -#endif
   8.134 -	rep movsb %ds:(%si), %es:(%di)	// copy literals
   8.135 -	cmpw	%dx, %si
   8.136 -	jb	lz4string
   8.137 -#endif
   8.138 -done:
   8.139 -	movw	$0x020E, %di
   8.140 -	movsw				// version string
   8.141 -	movw	$0x01F1, %di
   8.142 -	movsb				// setup size
   8.143 -	//popaw
   8.144 -	retf
   8.145 -
   8.146 -#if  defined(COMPRESSION_LZMA)
   8.147 -#define FLAT16		1
   8.148 -#define FLAT16OUT	1
   8.149 -#define NO_LZMA_HEADER	1
   8.150 -unlzma:
   8.151 -#include "unlzma.S"
   8.152 -#elif defined(COMPRESSION_LZSA2)
   8.153 -unlzsa2:
   8.154 -#include "unlzsa2.S"
   8.155 -#elif defined(COMPRESSION_LZSA1)
   8.156 -unlzsa1:
   8.157 -#include "unlzsa1.S"
   8.158 -#endif
   8.159 -
   8.160 -packed:
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux/stuff/unzx0.S	Tue Aug 15 09:20:55 2023 +0000
     9.3 @@ -0,0 +1,127 @@
     9.4 +#ifndef FLAT32
     9.5 +//   input   ds:si=inStream, es:di=outStream
     9.6 +//   output  outStream[], ds:si, es:di
     9.7 +	.code16
     9.8 +#define BX	%bx
     9.9 +#define SI	%si
    9.10 +#define DI	%di
    9.11 +#else
    9.12 +//   input   esi=inStream, edi=outStream
    9.13 +//   output  outStream[], ds:esi, es:edi
    9.14 +	.code32
    9.15 +#define BX	%ebx
    9.16 +#define SI	%esi
    9.17 +#define DI	%edi
    9.18 +#endif
    9.19 +
    9.20 +//  unzx0_8088.S - ZX0 decompressor for 8088 - 73 bytes - NASM
    9.21 +//
    9.22 +//  inputs:
    9.23 +//  * ds:si: start of compressed data
    9.24 +//  * es:di: start of decompression buffer
    9.25 +//
    9.26 +//  Copyright (C) 2021 Emmanuel Marty
    9.27 +//  ZX0 compression (c) 2021 Einar Saukas, https://github.com/einar-saukas/ZX0
    9.28 +//
    9.29 +//  This software is provided 'as-is', without any express or implied
    9.30 +//  warranty.  In no event will the authors be held liable for any damages
    9.31 +//  arising from the use of this software.
    9.32 +//
    9.33 +//  Permission is granted to anyone to use this software for any purpose,
    9.34 +//  including commercial applications, and to alter it and redistribute it
    9.35 +//  freely, subject to the following restrictions:
    9.36 +//
    9.37 +//  1. The origin of this software must not be misrepresented; you must not
    9.38 +//     claim that you wrote the original software. If you use this software
    9.39 +//     in a product, an acknowledgment in the product documentation would be
    9.40 +//     appreciated but is not required.
    9.41 +//  2. Altered source versions must be plainly marked as such, and must not be
    9.42 +//     misrepresented as being the original software.
    9.43 +//  3. This notice may not be removed or altered from any source distribution.
    9.44 +
    9.45 +zx0_decompress:
    9.46 +//        cld                     // make string operations go forward
    9.47 +        movb    $0x80, %al      // initialize empty bit queue
    9.48 +                                // plus bit to roll into carry
    9.49 +        stc
    9.50 +        sbb     BX, BX          // initialize rep-offset to 1
    9.51 +
    9.52 +.literals:
    9.53 +#if !defined(FLAT16) && !defined(FLAT32)
    9.54 +        movw    $32768, %dx
    9.55 +        cmpw    %dx, %si        // assume 32767 literals max
    9.56 +        jb      .si_ok
    9.57 +        subw    %dx, %si
    9.58 +        movw    %ds, %dx
    9.59 +        addb    $8, %dh
    9.60 +        movw    %dx, %ds
    9.61 +.si_ok:
    9.62 +#endif
    9.63 +        call    .get_elias      // read number of literals to copy
    9.64 +        rep     movsb           // copy literal bytes
    9.65 +
    9.66 +        addb     %al, %al       // shift bit queue, and high bit into carry
    9.67 +        jc      .get_offset     // if 1: read offset, if 0: rep-match
    9.68 +
    9.69 +        call    .get_elias      // read rep-match length (starts at 1)
    9.70 +
    9.71 +#if !defined(FLAT16OUT) && !defined(FLAT32)
    9.72 +        jmp     .copy_match
    9.73 +.fix_di:
    9.74 +        subw    $256, %di
    9.75 +        movw    %es, %dx
    9.76 +        addw    $16, %dx
    9.77 +        movw    %dx, %es
    9.78 +.copy_match:
    9.79 +        cmpw    $-32640, %di    // assume 32639 max window
    9.80 +        ja      .fix_di
    9.81 +#else
    9.82 +.copy_match:
    9.83 +#endif
    9.84 +        push    SI              // save si (current pointer to compressed data)
    9.85 +        lea     (BX,DI), SI     // point to destination in es:di + offset in bx
    9.86 +#ifdef ONLY8086
    9.87 +.copy_loop:
    9.88 +	movsb %es:(SI), %es:(DI) // copy matched bytes using NMOS 8088/8086 workaround.
    9.89 +	loop	.copy_loop
    9.90 +#else
    9.91 +        rep movsb %es:(SI), %es:(DI)  // copy matched bytes
    9.92 +#endif
    9.93 +        pop     SI              // restore si
    9.94 +
    9.95 +        addb    %al, %al        // read 'literal or match' bit
    9.96 +        jnc     .literals       // if 0: go copy literals
    9.97 +
    9.98 +.get_offset:
    9.99 +        movb    $0xfe, %cl      // initialize value to FEh
   9.100 +        call    .elias_loop     // read high byte of match offset, set carry
   9.101 +        incb    %cl             // obtain negative offset high byte
   9.102 +        je      .done           // exit if EOD marker
   9.103 +        
   9.104 +        movb    %cl, %bh        // transfer negative high byte into bh
   9.105 +        movw    $1, %cx         // initialize match length value to 1
   9.106 +        movb    (%si), %bl      // read low byte of offset + 1 bit of len
   9.107 +        incw    %si             // inc instruction keep carry set
   9.108 +                                // set high bit that is shifted into bit 15
   9.109 +        rcrw    $1, %bx         // shift len bit into carry/offset in place
   9.110 +        call    .elias_bt       // if len bit is set, no need for more
   9.111 +                                // else read rest of elias-encoded match length
   9.112 +        incw    %cx             // fix match length
   9.113 +        jmp     .copy_match     // go copy match
   9.114 +
   9.115 +.get_elias:
   9.116 +        movw    $1, %cx         // initialize value to 1
   9.117 +.elias_loop:
   9.118 +        addb    %al, %al        // shift bit queue, and high bit into carry
   9.119 +        jnz     .got_bit        // queue not empty, bits remain
   9.120 +        lodsb                   // read 8 new bits
   9.121 +        adcb    %al, %al        // shift bit queue, and high bit into carry
   9.122 +.got_bit:
   9.123 +.elias_bt:
   9.124 +        jc      .got_elias      // done if control bit is 1
   9.125 +        addb    %al, %al        // read data bit
   9.126 +        adcw    %cx, %cx        // shift into cx
   9.127 +        jmp     .elias_loop     // keep reading
   9.128 +.got_elias:
   9.129 +.done:
   9.130 +        ret