gvmat32.asm 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
;
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
; File written by Gilles Vollant, by modifiying the longest_match
;  from Jean-loup Gailly in deflate.c
; It need wmask == 0x7fff
;     (assembly code is faster with a fixed wmask)
;
; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
;   I compile with : "ml /coff /Zi /c gvmat32.asm"
;

;uInt longest_match_7fff(s, cur_match)
;    deflate_state *s;
;    IPos cur_match;                             /* current match */

        NbStack         equ     76
        cur_match       equ     dword ptr[esp+NbStack-0]
        str_s           equ     dword ptr[esp+NbStack-4]
; 5 dword on top (ret,ebp,esi,edi,ebx)
        adrret          equ     dword ptr[esp+NbStack-8]
        pushebp         equ     dword ptr[esp+NbStack-12]
        pushedi         equ     dword ptr[esp+NbStack-16]
        pushesi         equ     dword ptr[esp+NbStack-20]
        pushebx         equ     dword ptr[esp+NbStack-24]

        chain_length    equ     dword ptr [esp+NbStack-28]
        limit           equ     dword ptr [esp+NbStack-32]
        best_len        equ     dword ptr [esp+NbStack-36]
        window          equ     dword ptr [esp+NbStack-40]
        prev            equ     dword ptr [esp+NbStack-44]
        scan_start      equ      word ptr [esp+NbStack-48]
        wmask           equ     dword ptr [esp+NbStack-52]
        match_start_ptr equ     dword ptr [esp+NbStack-56]
        nice_match      equ     dword ptr [esp+NbStack-60]
        scan            equ     dword ptr [esp+NbStack-64]

        windowlen       equ     dword ptr [esp+NbStack-68]
        match_start     equ     dword ptr [esp+NbStack-72]
        strend          equ     dword ptr [esp+NbStack-76]
        NbStackAdd      equ     (NbStack-24)

    .386p

    name    gvmatch
    .MODEL  FLAT



;  all the +4 offsets are due to the addition of pending_buf_size (in zlib
;  in the deflate_state structure since the asm code was first written
;  (if you compile with zlib 1.0.4 or older, remove the +4).
;  Note : these value are good with a 8 bytes boundary pack structure
    dep_chain_length    equ     70h+4
    dep_window          equ     2ch+4
    dep_strstart        equ     60h+4
    dep_prev_length     equ     6ch+4
    dep_nice_match      equ     84h+4
    dep_w_size          equ     20h+4
    dep_prev            equ     34h+4
    dep_w_mask          equ     28h+4
    dep_good_match      equ     80h+4
    dep_match_start     equ     64h+4
    dep_lookahead       equ     68h+4


_TEXT                   segment

IFDEF NOUNDERLINE
                        public  longest_match_7fff
;                        public  match_init
ELSE
                        public  _longest_match_7fff
;                        public  _match_init
ENDIF

    MAX_MATCH           equ     258
    MIN_MATCH           equ     3
    MIN_LOOKAHEAD       equ     (MAX_MATCH+MIN_MATCH+1)



IFDEF NOUNDERLINE
;match_init      proc near
;                ret
;match_init      endp
ELSE
;_match_init     proc near
;                ret
;_match_init     endp
ENDIF


IFDEF NOUNDERLINE
longest_match_7fff   proc near
ELSE
_longest_match_7fff  proc near
ENDIF

        mov     edx,[esp+4]



        push    ebp
        push    edi
        push    esi
        push    ebx

        sub     esp,NbStackAdd

; initialize or check the variables used in match.asm.
        mov     ebp,edx

; chain_length = s->max_chain_length
; if (prev_length>=good_match) chain_length >>= 2
        mov     edx,[ebp+dep_chain_length]
        mov     ebx,[ebp+dep_prev_length]
        cmp     [ebp+dep_good_match],ebx
        ja      noshr
        shr     edx,2
noshr:
; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
        inc     edx
        mov     edi,[ebp+dep_nice_match]
        mov     chain_length,edx
        mov     eax,[ebp+dep_lookahead]
        cmp     eax,edi
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
        jae     nolookaheadnicematch
        mov     edi,eax
nolookaheadnicematch:
; best_len = s->prev_length
        mov     best_len,ebx

; window = s->window
        mov     esi,[ebp+dep_window]
        mov     ecx,[ebp+dep_strstart]
        mov     window,esi

        mov     nice_match,edi
; scan = window + strstart
        add     esi,ecx
        mov     scan,esi
; dx = *window
        mov     dx,word ptr [esi]
; bx = *(window+best_len-1)
        mov     bx,word ptr [esi+ebx-1]
        add     esi,MAX_MATCH-1
; scan_start = *scan
        mov     scan_start,dx
; strend = scan + MAX_MATCH-1
        mov     strend,esi
; bx = scan_end = *(window+best_len-1)

;    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
;        s->strstart - (IPos)MAX_DIST(s) : NIL;

        mov     esi,[ebp+dep_w_size]
        sub     esi,MIN_LOOKAHEAD
; here esi = MAX_DIST(s)
        sub     ecx,esi
        ja      nodist
        xor     ecx,ecx
nodist:
        mov     limit,ecx

; prev = s->prev
        mov     edx,[ebp+dep_prev]
        mov     prev,edx

;
        mov     edx,dword ptr [ebp+dep_match_start]
        mov     bp,scan_start
        mov     eax,cur_match
        mov     match_start,edx

        mov     edx,window
        mov     edi,edx
        add     edi,best_len
        mov     esi,prev
        dec     edi
; windowlen = window + best_len -1
        mov     windowlen,edi

        jmp     beginloop2
        align   4

; here, in the loop
;       eax = ax = cur_match
;       ecx = limit
;        bx = scan_end
;        bp = scan_start
;       edi = windowlen (window + best_len -1)
;       esi = prev


;// here; chain_length <=16
normalbeg0add16:
        add     chain_length,16
        jz      exitloop
normalbeg0:
        cmp     word ptr[edi+eax],bx
        je      normalbeg2noroll
rcontlabnoroll:
; cur_match = prev[cur_match & wmask]
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
; if cur_match > limit, go to exitloop
        cmp     ecx,eax
        jnb     exitloop
; if --chain_length != 0, go to exitloop
        dec     chain_length
        jnz     normalbeg0
        jmp     exitloop

normalbeg2noroll:
; if (scan_start==*(cur_match+window)) goto normalbeg2
        cmp     bp,word ptr[edx+eax]
        jne     rcontlabnoroll
        jmp     normalbeg2

contloop3:
        mov     edi,windowlen

; cur_match = prev[cur_match & wmask]
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
; if cur_match > limit, go to exitloop
        cmp     ecx,eax
jnbexitloopshort1:
        jnb     exitloop
; if --chain_length != 0, go to exitloop


; begin the main loop
beginloop2:
        sub     chain_length,16+1
; if chain_length <=16, don't use the unrolled loop
        jna     normalbeg0add16

do16:
        cmp     word ptr[edi+eax],bx
        je      normalbeg2dc0

maccn   MACRO   lab
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
        cmp     ecx,eax
        jnb     exitloop
        cmp     word ptr[edi+eax],bx
        je      lab
        ENDM

rcontloop0:
        maccn   normalbeg2dc1

rcontloop1:
        maccn   normalbeg2dc2

rcontloop2:
        maccn   normalbeg2dc3

rcontloop3:
        maccn   normalbeg2dc4

rcontloop4:
        maccn   normalbeg2dc5

rcontloop5:
        maccn   normalbeg2dc6

rcontloop6:
        maccn   normalbeg2dc7

rcontloop7:
        maccn   normalbeg2dc8

rcontloop8:
        maccn   normalbeg2dc9

rcontloop9:
        maccn   normalbeg2dc10

rcontloop10:
        maccn   short normalbeg2dc11

rcontloop11:
        maccn   short normalbeg2dc12

rcontloop12:
        maccn   short normalbeg2dc13

rcontloop13:
        maccn   short normalbeg2dc14

rcontloop14:
        maccn   short normalbeg2dc15

rcontloop15:
        and     eax,7fffh
        mov     ax,word ptr[esi+eax*2]
        cmp     ecx,eax
        jnb     exitloop

        sub     chain_length,16
        ja      do16
        jmp     normalbeg0add16

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

normbeg MACRO   rcontlab,valsub
; if we are here, we know that *(match+best_len-1) == scan_end
        cmp     bp,word ptr[edx+eax]
; if (match != scan_start) goto rcontlab
        jne     rcontlab
; calculate the good chain_length, and we'll compare scan and match string
        add     chain_length,16-valsub
        jmp     iseq
        ENDM


normalbeg2dc11:
        normbeg rcontloop11,11

normalbeg2dc12:
        normbeg short rcontloop12,12

normalbeg2dc13:
        normbeg short rcontloop13,13

normalbeg2dc14:
        normbeg short rcontloop14,14

normalbeg2dc15:
        normbeg short rcontloop15,15

normalbeg2dc10:
        normbeg rcontloop10,10

normalbeg2dc9:
        normbeg rcontloop9,9

normalbeg2dc8:
        normbeg rcontloop8,8

normalbeg2dc7:
        normbeg rcontloop7,7

normalbeg2dc6:
        normbeg rcontloop6,6

normalbeg2dc5:
        normbeg rcontloop5,5

normalbeg2dc4:
        normbeg rcontloop4,4

normalbeg2dc3:
        normbeg rcontloop3,3

normalbeg2dc2:
        normbeg rcontloop2,2

normalbeg2dc1:
        normbeg rcontloop1,1

normalbeg2dc0:
        normbeg rcontloop0,0


; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end

normalbeg2:
        mov     edi,window

        cmp     bp,word ptr[edi+eax]
        jne     contloop3                   ; if *(ushf*)match != scan_start, continue

iseq:
; if we are here, we know that *(match+best_len-1) == scan_end
; and (match == scan_start)

        mov     edi,edx
        mov     esi,scan                    ; esi = scan
        add     edi,eax                     ; edi = window + cur_match = match

        mov     edx,[esi+3]                 ; compare manually dword at match+3
        xor     edx,[edi+3]                 ; and scan +3

        jz      begincompare                ; if equal, go to long compare

; we will determine the unmatch byte and calculate len (in esi)
        or      dl,dl
        je      eq1rr
        mov     esi,3
        jmp     trfinval
eq1rr:
        or      dx,dx
        je      eq1

        mov     esi,4
        jmp     trfinval
eq1:
        and     edx,0ffffffh
        jz      eq11
        mov     esi,5
        jmp     trfinval
eq11:
        mov     esi,6
        jmp     trfinval

begincompare:
        ; here we now scan and match begin same
        add     edi,6
        add     esi,6
        mov     ecx,(MAX_MATCH-(2+4))/4     ; scan for at most MAX_MATCH bytes
        repe    cmpsd                       ; loop until mismatch

        je      trfin                       ; go to trfin if not unmatch
; we determine the unmatch byte
        sub     esi,4
        mov     edx,[edi-4]
        xor     edx,[esi]

        or      dl,dl
        jnz     trfin
        inc     esi

        or      dx,dx
        jnz     trfin
        inc     esi

        and     edx,0ffffffh
        jnz     trfin
        inc     esi

trfin:
        sub     esi,scan          ; esi = len
trfinval:
; here we have finised compare, and esi contain len of equal string
        cmp     esi,best_len        ; if len > best_len, go newbestlen
        ja      short newbestlen
; now we restore edx, ecx and esi, for the big loop
        mov     esi,prev
        mov     ecx,limit
        mov     edx,window
        jmp     contloop3

newbestlen:
        mov     best_len,esi        ; len become best_len

        mov     match_start,eax     ; save new position as match_start
        cmp     esi,nice_match      ; if best_len >= nice_match, exit
        jae     exitloop
        mov     ecx,scan
        mov     edx,window          ; restore edx=window
        add     ecx,esi
        add     esi,edx

        dec     esi
        mov     windowlen,esi       ; windowlen = window + best_len-1
        mov     bx,[ecx-1]          ; bx = *(scan+best_len-1) = scan_end

; now we restore ecx and esi, for the big loop :
        mov     esi,prev
        mov     ecx,limit
        jmp     contloop3

exitloop:
; exit : s->match_start=match_start
        mov     ebx,match_start
        mov     ebp,str_s
        mov     ecx,best_len
        mov     dword ptr [ebp+dep_match_start],ebx        
        mov     eax,dword ptr [ebp+dep_lookahead]
        cmp     ecx,eax
        ja      minexlo
        mov     eax,ecx
minexlo:
; return min(best_len,s->lookahead)
        
; restore stack and register ebx,esi,edi,ebp
        add     esp,NbStackAdd

        pop     ebx
        pop     esi
        pop     edi
        pop     ebp
        ret
InfoAuthor:
; please don't remove this string !
; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
        db     0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah



IFDEF NOUNDERLINE
longest_match_7fff   endp
ELSE
_longest_match_7fff  endp
ENDIF


IFDEF NOUNDERLINE
cpudetect32     proc near
ELSE
_cpudetect32    proc near
ENDIF


	pushfd                  ; push original EFLAGS
	pop     eax             ; get original EFLAGS
	mov     ecx, eax        ; save original EFLAGS
	xor     eax, 40000h     ; flip AC bit in EFLAGS
	push    eax             ; save new EFLAGS value on stack
	popfd                   ; replace current EFLAGS value
	pushfd                  ; get new EFLAGS
	pop     eax             ; store new EFLAGS in EAX
	xor     eax, ecx        ; cant toggle AC bit, processor=80386
	jz      end_cpu_is_386  ; jump if 80386 processor
	push    ecx
	popfd                   ; restore AC bit in EFLAGS first

	pushfd
	pushfd
	pop     ecx
			
	mov     eax, ecx        ; get original EFLAGS
	xor     eax, 200000h    ; flip ID bit in EFLAGS
	push    eax             ; save new EFLAGS value on stack
	popfd                   ; replace current EFLAGS value
	pushfd                  ; get new EFLAGS
	pop		eax	            ; store new EFLAGS in EAX
	popfd                   ; restore original EFLAGS
	xor		eax, ecx        ; cant toggle ID bit,
	je		is_old_486		; processor=old

	mov     eax,1
	db      0fh,0a2h        ;CPUID   

exitcpudetect:
	ret

end_cpu_is_386:
	mov     eax,0300h
	jmp     exitcpudetect

is_old_486:
	mov     eax,0400h
	jmp     exitcpudetect

IFDEF NOUNDERLINE
cpudetect32     endp
ELSE
_cpudetect32    endp
ENDIF

_TEXT   ends
end