Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 21-installer-nodd
  • 2210-pre1
  • 2210-pre2
  • 2210-rc1
  • 2210-rc2
  • 2210-rc3
  • 2211-pre1
  • 2211-pre2
  • 2211-rc1
  • 2212-pre1
  • 2212-pre2
  • 2212-pre3
  • 2212-rc1
  • 2213
  • 2_2_12
  • 64-gl-log
  • COM_ImmedExecute-lua
  • DJGPP
  • SRB2-Stuff
  • SRB2_Discord
  • Sugoi-2
  • accel-momentum
  • any-resolution
  • appveyor
  • blend-locking
  • blentran
  • blua-unary-not-fix
  • boost-tickrate
  • bustablemobjzfix
  • bustablesoundz
  • cleanup-opengl
  • cleanupmusic
  • cmake-valgrind
  • crawlacommander-sprites
  • cutscene-cleanup
  • dd-music-bypass
  • dd-music-fix
  • delfile2
  • deprecate-lua-dedicated-server
  • dpl-2
  • dropshadows-spawning
  • dynabsp
  • emblem-drawing
  • exchndl-xp-fix
  • few-kart-lua-changes
  • ffloorclip
  • fix-cvar-conflicts
  • fix-opengl-shear-roll
  • flipfuncpointers
  • fof-lightlist-fixes
  • font-FUCK
  • font_drawer
  • frictionrefactor
  • fruits-clipper
  • fuck-macros-1
  • gamepad-luakeydown
  • gamepad-morefixes
  • gamepad_experiments
  • gametype-refactor
  • ghost-networking
  • gif-splitting
  • gitlab-ci
  • grr-lj
  • hitboxviewer
  • hwr-texture-cache-refactor
  • hwrender2
  • improve-439
  • improve-download-refuse-message
  • increase-packet-tics
  • input-display
  • input-display-translucency
  • io
  • joystick-juggling-maz
  • keycodes-only
  • kill-hud-feetoffset
  • ksf-wadfiles
  • ld413-mp-fix
  • levelstruct
  • libpng-version-support
  • linedef-actions
  • lj-test
  • lol-states
  • loopedsounds
  • lower-unpegged-fix
  • lua-change-gametype
  • lua-colorlib
  • lua-command-netids
  • lua-extracolormap
  • lua-local
  • lua-minmax-plus-bruh-moments
  • makefile-auto-mingw-gcc
  • makefile-tinkering
  • map-components-signedness-fixes
  • maretimers
  • master
  • menu-edits
  • mobj-dispoffset
  • more-cleanup
  • multithread
  • musicdef-lua
  • SRB2_release_2.1
  • SRB2_release_2.1.1
  • SRB2_release_2.1.10
  • SRB2_release_2.1.11
  • SRB2_release_2.1.12
  • SRB2_release_2.1.14
  • SRB2_release_2.1.15
  • SRB2_release_2.1.16
  • SRB2_release_2.1.16a
  • SRB2_release_2.1.17
  • SRB2_release_2.1.18
  • SRB2_release_2.1.19
  • SRB2_release_2.1.2
  • SRB2_release_2.1.20
  • SRB2_release_2.1.21
  • SRB2_release_2.1.22
  • SRB2_release_2.1.23
  • SRB2_release_2.1.24
  • SRB2_release_2.1.25
  • SRB2_release_2.1.3
  • SRB2_release_2.1.4
  • SRB2_release_2.1.5
  • SRB2_release_2.1.6
  • SRB2_release_2.1.7
  • SRB2_release_2.1.8
  • SRB2_release_2.1.9
  • SRB2_release_2.2.0
  • SRB2_release_2.2.1
  • SRB2_release_2.2.10
  • SRB2_release_2.2.11
  • SRB2_release_2.2.12
  • SRB2_release_2.2.13
  • SRB2_release_2.2.2
  • SRB2_release_2.2.3
  • SRB2_release_2.2.4
  • SRB2_release_2.2.5
  • SRB2_release_2.2.6
  • SRB2_release_2.2.7
  • SRB2_release_2.2.8
  • SRB2_release_2.2.9
  • td-release-v1.0.0
141 results

Target

Select target project
  • STJr/SRB2
  • Sryder/SRB2
  • wolfy852/SRB2
  • Alpha2244/SRB2
  • Inuyasha/SRB2
  • yoshibot/SRB2
  • TehRealSalt/SRB2
  • PrisimaTF/SRB2
  • Hatninja/SRB2
  • SteelT/SRB2
  • james/SRB2
  • ShaderWraith/SRB2
  • SinnamonLat/SRB2
  • mazmazz_/SRB2
  • filpAM/SRB2
  • chaoloveicemdboy/SRB2
  • Whooa21/SRB2
  • Machturne/SRB2
  • Golden/SRB2
  • Tatsuru/SRB2
  • Snu/SRB2
  • Zwip-Zwap_Zapony/SRB2
  • fickleheart/SRB2
  • alphaRexJames/SRB2
  • JJK/SRB2
  • diskpoppy/SRB2
  • Hannu_Hanhi/SRB2
  • ZipperQR/SRB2
  • kays/SRB2
  • spherallic/SRB2
  • Zippy_Zolton/SRB2
  • namiishere/SRB2
  • Ors/SRB2
  • SMS_Alfredo/SRB2
  • sonic_edge/SRB2
  • lavla/SRB2
  • ashi/SRB2
  • X.organic/SRB2
  • Fafabis/SRB2
  • Meziu/SRB2
  • v-rob/SRB2
  • tertu/SRB2
  • bitten2up/SRB2
  • flarn2006/SRB2
  • Krabs/SRB2
  • clairebun/SRB2
  • Lactozilla/SRB2
  • thehackstack/SRB2
  • Spice/SRB2
  • win8linux/SRB2
  • JohnFrostFox/SRB2
  • talktoneon726/SRB2
  • Wane/SRB2
  • Lamibe/SRB2
  • spectrumuk2/srb-2
  • nerdyminer18/srb-2
  • 256nil/SRB2
  • ARJr/SRB2
  • Alam/SRB2
  • Zenya/srb-2-marathon-demos
  • Acelite/srb-2-archivedmodifications
  • MIDIMan/SRB2
  • Lach/SRB2
  • Frostiikin/bounce-tweaks
  • Jaden/SRB2
  • Tyron/SRB2
  • Astronight/SRB2
  • Mari0shi06/SRB2
  • aiire/SRB2
  • Galactice/SRB2
  • srb2-ports/srb2-dreamcast
  • sdasdas/SRB2
  • chreas/srb-2-vr
  • StarManiaKG/the-story-of-sinically-rocketing-and-botching-the-2nd
  • LoganAir/SRB2
  • NepDisk/srb-2
  • alufolie91/SRB2
  • Felicia.iso/SRB2
  • twi/SRB2
  • BarrelsOFun/SRB2
  • Speed2411/SRB2
  • Leather_Realms/SRB2
  • Ayemar/SRB2
  • Acelite/SRB2
  • VladDoc/SRB2
  • kaldrum/model-features
  • strawberryfox417/SRB2
  • Lugent/SRB2
  • Rem/SRB2
  • Refrag/SRB2
  • Henry_3230/srb-3230
  • TehPuertoRicanSpartan2/tprs-srb2
  • Leminn/srb-2-marathon-stuff
  • chromaticpipe2/SRB2
  • MiguelGustavo15/SRB2
  • Maru/srb-2-tests
  • SilicDev/SRB2
  • UnmatchedBracket/SRB2
  • HybridDog/SRB2
  • xordspar0/SRB2
  • jsjhbewfhh/SRB2
  • Fancy2209/SRB2
  • Lorsoen/SRB2
  • shindoukin/SRB2
  • GamerOfDays/SRB2
  • Craftyawesome/SRB2
  • tenshi-tensai-tennoji/SRB2
  • Scarfdudebalder/SRB2
  • luigi-budd/srb-2-fix-interplag-lockon
  • mskluesner/SRB2
  • johnpetersa19/SRB2
  • Pheazant/SRB2
  • chromaticpipe2/srb2classic
  • romoney5/SRB2
  • PAS/SRB2Classic
  • BlueStaggo/SRB2
  • Jisk/srb-2-beef-jerky
117 results
Select Git revision
  • 21-installer-nodd
  • 2210-pre1
  • 2210-pre2
  • 2210-rc1
  • 2210-rc2
  • 2210-rc3
  • 2211-pre1
  • 2211-pre2
  • 2211-rc1
  • 2212-pre1
  • 2212-pre2
  • 2212-pre3
  • 2212-rc1
  • 2213
  • 2214-pre1
  • 2214-pre2
  • 2214-pre3
  • 2214-pre4
  • 2_2_12
  • 64-gl-log
  • COM_ImmedExecute-lua
  • DJGPP
  • accel-momentum
  • acs
  • action-args
  • alpha-fixes
  • any-resolution
  • appveyor
  • blend-locking
  • blentran
  • blua-unary-not-fix
  • boost-tickrate
  • bustablesoundz
  • cleanup-opengl
  • cleanupmusic
  • clipmidtex
  • cmake-valgrind
  • crawlacommander-sprites
  • custom-map-names
  • custom-teams
  • cutscene-cleanup
  • dd-music-bypass
  • dd-music-fix
  • delfile2
  • deprecate-lua-dedicated-server
  • dpl-2
  • dropshadows-spawning
  • dynabsp
  • emblem-drawing
  • exchndl-xp-fix
  • extra-textures
  • few-kart-lua-changes
  • ffloorclip
  • fix-167
  • fix-cvar-conflicts
  • fix-opengl-parameter-crash
  • fix-opengl-shear-roll
  • flipfuncpointers
  • fof-lightlist-fixes
  • font-FUCK
  • frictionrefactor
  • fuck-macros-1
  • gamepad-luakeydown
  • gamepad-morefixes
  • gamepad_experiments
  • gametype-refactor
  • gametype-refactor-1
  • gametype-refactor-player-spawns
  • ghost-networking
  • gif-splitting
  • grr-lj
  • hitboxviewer
  • hwr-texture-cache-refactor
  • hwrender2
  • improve-439
  • increase-maxconditionsets
  • increase-packet-tics
  • input-display
  • input-display-translucency
  • io
  • joystick-juggling-maz
  • just-in-case
  • keycodes-only
  • ksf-wadfiles
  • ld413-mp-fix
  • levelstruct
  • libpng-version-support
  • linedef-actions
  • lj-test
  • lol-states
  • loopedsounds
  • lower-unpegged-fix
  • lua-change-gametype
  • lua-command-netids
  • lua-gfx-2
  • lua-gfx-sprites
  • lua-local
  • makefile-auto-mingw-gcc
  • makefile-tinkering
  • map-components-signedness-fixes
  • SRB2_release_2.1
  • SRB2_release_2.1.1
  • SRB2_release_2.1.10
  • SRB2_release_2.1.11
  • SRB2_release_2.1.12
  • SRB2_release_2.1.14
  • SRB2_release_2.1.15
  • SRB2_release_2.1.16
  • SRB2_release_2.1.16a
  • SRB2_release_2.1.17
  • SRB2_release_2.1.18
  • SRB2_release_2.1.19
  • SRB2_release_2.1.2
  • SRB2_release_2.1.20
  • SRB2_release_2.1.21
  • SRB2_release_2.1.22
  • SRB2_release_2.1.23
  • SRB2_release_2.1.24
  • SRB2_release_2.1.25
  • SRB2_release_2.1.3
  • SRB2_release_2.1.4
  • SRB2_release_2.1.5
  • SRB2_release_2.1.6
  • SRB2_release_2.1.7
  • SRB2_release_2.1.8
  • SRB2_release_2.1.9
  • SRB2_release_2.2.0
  • SRB2_release_2.2.1
  • SRB2_release_2.2.10
  • SRB2_release_2.2.11
  • SRB2_release_2.2.12
  • SRB2_release_2.2.13
  • SRB2_release_2.2.15
  • SRB2_release_2.2.2
  • SRB2_release_2.2.3
  • SRB2_release_2.2.4
  • SRB2_release_2.2.5
  • SRB2_release_2.2.6
  • SRB2_release_2.2.7
  • SRB2_release_2.2.8
  • SRB2_release_2.2.9
  • td-release-v1.0.0
142 results
Show changes
......@@ -155,7 +155,7 @@ Worker (
return 0;
}
void
I_thread_handle
I_spawn_thread (
const char * name,
I_thread_fn entry,
......@@ -189,6 +189,7 @@ I_spawn_thread (
}
}
I_unlock_mutex(i_thread_pool_mutex);
return (I_thread_handle)th;
}
int
......@@ -354,3 +355,18 @@ I_wake_all_cond (
if (SDL_CondBroadcast(cond) == -1)
abort();
}
INT32
I_atomic_load (
I_Atomicptr_t atomic
){
return SDL_AtomicGet(atomic);
}
INT32
I_atomic_exchange (
I_Atomicptr_t atomic,
INT32 val
){
return SDL_AtomicSet(atomic, val);
}
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DooM Legacy Team.
;; Copyright (C) 1999-2021 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap.nas
;; DESCRIPTION:
;; Assembler optimised rendering code for software mode.
;; Draw wall columns.
[BITS 32]
%define FRACBITS 16
%define TRANSPARENTPIXEL 255
%ifdef LINUX
%macro cextern 1
[extern %1]
%endmacro
%macro cglobal 1
[global %1]
%endmacro
%else
%macro cextern 1
%define %1 _%1
[extern %1]
%endmacro
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
%endif
; The viddef_s structure. We only need the width field.
struc viddef_s
resb 12
.width: resb 4
resb 44
endstruc
;; externs
;; columns
cextern dc_x
cextern dc_yl
cextern dc_yh
cextern ylookup
cextern columnofs
cextern dc_source
cextern dc_texturemid
cextern dc_texheight
cextern dc_iscale
cextern dc_hires
cextern centery
cextern centeryfrac
cextern dc_colormap
cextern dc_transmap
cextern colormaps
cextern vid
cextern topleft
; DELME
cextern R_DrawColumn_8
; polygon edge rasterizer
cextern prastertab
[SECTION .data]
;;.align 4
loopcount dd 0
pixelcount dd 0
tystep dd 0
[SECTION .text]
;;----------------------------------------------------------------------
;;
;; R_DrawColumn : 8bpp column drawer
;;
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
;; Revised by G. Dick July 2010 to support the intervening twelve years'
;; worth of changes to the renderer. Since I only vaguely know what I'm
;; doing, this is probably rather suboptimal. Help appreciated!
;;
;;----------------------------------------------------------------------
;; fracstep, vid.width in memory
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = heightmask
;; esi = source
;; edi = dest
;; ebp = frac
;;----------------------------------------------------------------------
cglobal R_DrawColumn_8_ASM
; align 16
R_DrawColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov edi,[ylookup+ebp*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx,[dc_yh]
add ecx,1
sub ecx,ebp ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale; // But we just use [dc_iscale]
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
mov eax,ebp ;; dc_yl
shl eax,FRACBITS
sub eax,[centeryfrac]
imul dword [dc_iscale]
shrd eax,edx,FRACBITS
add eax,[dc_texturemid]
mov ebp,eax ;; ebp = frac
mov ebx,[dc_colormap]
mov esi,[dc_source]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires],0x01
jz .texheightcheck
xor ebp,ebp
;;
;; Check for power of two
;;
.texheightcheck:
mov edx,[dc_texheight]
sub edx,1 ;; edx = heightmask
test edx,[dc_texheight]
jnz .notpowertwo
test ecx,0x01 ;; Test for odd no. pixels
jnz .odd
;;
;; Texture height is a power of two, so we get modular arithmetic by
;; masking
;;
.powertwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
.odd:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,2 ;; count -= 2
jg .powertwo
jmp .done
.notpowertwo:
add edx,1
shl edx,FRACBITS
test ebp,ebp
jns .notpowtwoloop
.makefracpos:
add ebp,edx ;; frac is negative; make it positive
js .makefracpos
.notpowtwoloop:
cmp ebp,edx ;; Reduce mod height
jl .writenonpowtwo
sub ebp,edx
jmp .notpowtwoloop
.writenonpowtwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part.
mov bl,[esi + eax] ;; ebx = colormap + texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,1
jnz .notpowtwoloop
;;
.done:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;;----------------------------------------------------------------------
;;
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
;; pixels.
;;
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
;; Revised by G. Dick July 2010 to support the intervening twelve years'
;; worth of changes to the renderer. Since I only vaguely know what I'm
;; doing, this is probably rather suboptimal. Help appreciated!
;;
;;----------------------------------------------------------------------
;; fracstep, vid.width in memory
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = heightmask
;; esi = source
;; edi = dest
;; ebp = frac
;;----------------------------------------------------------------------
cglobal R_Draw2sMultiPatchColumn_8_ASM
; align 16
R_Draw2sMultiPatchColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov edi,[ylookup+ebp*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx,[dc_yh]
add ecx,1
sub ecx,ebp ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale; // But we just use [dc_iscale]
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
mov eax,ebp ;; dc_yl
shl eax,FRACBITS
sub eax,[centeryfrac]
imul dword [dc_iscale]
shrd eax,edx,FRACBITS
add eax,[dc_texturemid]
mov ebp,eax ;; ebp = frac
mov ebx,[dc_colormap]
mov esi,[dc_source]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires],0x01
jz .texheightcheck
xor ebp,ebp
;;
;; Check for power of two
;;
.texheightcheck:
mov edx,[dc_texheight]
sub edx,1 ;; edx = heightmask
test edx,[dc_texheight]
jnz .notpowertwo
test ecx,0x01 ;; Test for odd no. pixels
jnz .odd
;;
;; Texture height is a power of two, so we get modular arithmetic by
;; masking
;;
.powertwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextpowtwoeven ;; If so, advance.
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextpowtwoeven:
;; dest += vid.width
add edi,[vid + viddef_s.width]
.odd:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextpowtwoodd ;; If so, advance.
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextpowtwoodd:
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,2 ;; count -= 2
jg .powertwo
jmp .done
.notpowertwo:
add edx,1
shl edx,FRACBITS
test ebp,ebp
jns .notpowtwoloop
.makefracpos:
add ebp,edx ;; frac is negative; make it positive
js .makefracpos
.notpowtwoloop:
cmp ebp,edx ;; Reduce mod height
jl .writenonpowtwo
sub ebp,edx
jmp .notpowtwoloop
.writenonpowtwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part.
mov bl,[esi + eax] ;; ebx = colormap + texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextnonpowtwo ;; If so, advance.
movzx eax,byte [ebx] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextnonpowtwo:
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,1
jnz .notpowtwoloop
;;
.done:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;;----------------------------------------------------------------------
;; R_DrawTranslucentColumnA_8
;;
;; Vertical column texture drawer, with transparency. Replaces Doom2's
;; 'fuzz' effect, which was not so beautiful.
;; Transparency is always impressive in some way, don't know why...
;;----------------------------------------------------------------------
cglobal R_DrawTranslucentColumn_8_ASM
R_DrawTranslucentColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov ebx,ebp
mov edi,[ylookup+ebx*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov eax,[dc_yh]
inc eax
sub eax,ebp ;; pixel count
mov [pixelcount],eax ;; save for final pixel
jle near vtdone ;; nothing to scale
;;
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
;;
mov ecx,[dc_iscale] ;; fracstep
mov eax,[centery]
sub eax,ebp
imul eax,ecx
mov edx,[dc_texturemid]
sub edx,eax
mov ebx,edx
shr ebx,16 ;; frac int.
and ebx,0x7f
shl edx,16 ;; y frac up
mov ebp,ecx
shl ebp,16 ;; fracstep f. up
shr ecx,16 ;; fracstep i. ->cl
and cl,0x7f
push cx
mov ecx,edx
pop cx
mov edx,[dc_colormap]
mov esi,[dc_source]
;;
;; lets rock :) !
;;
mov eax,[pixelcount]
shr eax,0x2
test byte [pixelcount],0x3
mov ch,al ;; quad count
mov eax,[dc_transmap]
je vt4quadloop
;;
;; do un-even pixel
;;
test byte [pixelcount],0x1
je trf2
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
pf: add edi,0x12345678
;;
;; do two non-quad-aligned pixels
;;
trf2: test byte [pixelcount],0x2
je trf3
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
pg: add edi,0x12345678
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
ph: add edi,0x12345678
;;
;; test if there was at least 4 pixels
;;
trf3: test ch,0xff ;; test quad count
je near vtdone
;;
;; ebp : ystep frac. upper 24 bits
;; edx : y frac. upper 24 bits
;; ebx : y i. lower 7 bits, masked for index
;; ecx : ch = counter, cl = y step i.
;; eax : colormap aligned 256
;; esi : source texture column
;; edi : dest screen
;;
vt4quadloop:
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [tystep],ebp
pi: add edi,0x12345678
mov al,[edi] ;; fetch dest : index into colormap
pj: sub edi,0x12345678
mov ebp,edi
pk: sub edi,0x12345678
jmp short inloop
align 4
vtquadloop:
add ecx,[tystep]
adc bl,cl
q1: add ebp,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
inloop:
add ecx,[tystep]
adc bl,cl
q2: add edi,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [ebp+0x0],dl
mov al,[edi] ;; fetch dest : index into colormap
add ecx,[tystep]
adc bl,cl
q3: add ebp,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
add ecx,[tystep]
adc bl,cl
q4: add edi,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
dec ch
jne vtquadloop
vtdone:
pop ebx
pop edi
pop esi
pop ebp
ret
;;----------------------------------------------------------------------
;; R_DrawShadeColumn
;;
;; for smoke..etc.. test.
;;----------------------------------------------------------------------
cglobal R_DrawShadeColumn_8_ASM
R_DrawShadeColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov ebx,ebp
mov edi,[ylookup+ebx*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov eax,[dc_yh]
inc eax
sub eax,ebp ;; pixel count
mov [pixelcount],eax ;; save for final pixel
jle near shdone ;; nothing to scale
;;
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
;;
mov ecx,[dc_iscale] ;; fracstep
mov eax,[centery]
sub eax,ebp
imul eax,ecx
mov edx,[dc_texturemid]
sub edx,eax
mov ebx,edx
shr ebx,16 ;; frac int.
and ebx,byte +0x7f
shl edx,16 ;; y frac up
mov ebp,ecx
shl ebp,16 ;; fracstep f. up
shr ecx,16 ;; fracstep i. ->cl
and cl,0x7f
mov esi,[dc_source]
;;
;; lets rock :) !
;;
mov eax,[pixelcount]
mov dh,al
shr eax,2
mov ch,al ;; quad count
mov eax,[colormaps]
test dh,3
je sh4quadloop
;;
;; do un-even pixel
;;
test dh,0x1
je shf2
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pl: add edi,0x12345678
;;
;; do two non-quad-aligned pixels
;;
shf2:
test dh,0x2
je shf3
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pm: add edi,0x12345678
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pn: add edi,0x12345678
;;
;; test if there was at least 4 pixels
;;
shf3:
test ch,0xff ;; test quad count
je near shdone
;;
;; ebp : ystep frac. upper 24 bits
;; edx : y frac. upper 24 bits
;; ebx : y i. lower 7 bits, masked for index
;; ecx : ch = counter, cl = y step i.
;; eax : colormap aligned 256
;; esi : source texture column
;; edi : dest screen
;;
sh4quadloop:
mov dh,0x7f ;; prep mask
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [tystep],ebp
po: add edi,0x12345678
mov al,[edi] ;; fetch dest : index into colormap
pp: sub edi,0x12345678
mov ebp,edi
pq: sub edi,0x12345678
jmp short shinloop
align 4
shquadloop:
add edx,[tystep]
adc bl,cl
and bl,dh
q5: add ebp,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
shinloop:
add edx,[tystep]
adc bl,cl
and bl,dh
q6: add edi,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
add edx,[tystep]
adc bl,cl
and bl,dh
q7: add ebp,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
add edx,[tystep]
adc bl,cl
and bl,dh
q8: add edi,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
dec ch
jne shquadloop
shdone:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;; ========================================================================
;; Rasterization of the segments of a LINEAR polygne textur of manire.
;; It is thus a question of interpolating coordinate them at the edges of texture in
;; the time that the X-coordinates minx/maxx for each line.
;; the argument ' dir' indicates which edges of texture are Interpol?:
;; 0: segments associs at edge TOP? and BOTTOM? (constant TY)
;; 1: segments associs at the LEFT and RIGHT edge (constant TX)
;; ========================================================================
;;
;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
;;
;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
;;
;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
;;
;;
;; Uses: extern struct rastery *_rastertab;
;;
MINX EQU 0
MAXX EQU 4
TX1 EQU 8
TY1 EQU 12
TX2 EQU 16
TY2 EQU 20
RASTERY_SIZEOF EQU 24
cglobal rasterize_segment_tex_asm
rasterize_segment_tex_asm:
push ebp
mov ebp,esp
sub esp,byte +0x8 ;; allocate the local variables
push ebx
push esi
push edi
o16 mov ax,es
push eax
;; #define DX [ebp-4]
;; #define TD [ebp-8]
mov eax,[ebp+0xc] ;; y1
mov ebx,[ebp+0x14] ;; y2
cmp ebx,eax
je near .L_finished ;; special (y1==y2) segment horizontal, exit!
jg near .L_rasterize_right
;;rasterize_left: ;; one rasterize a segment LEFT of the polygne
mov ecx,eax
sub ecx,ebx
inc ecx ;; y1-y2+1
mov eax,RASTERY_SIZEOF
mul ebx ;; * y2
mov esi,[prastertab]
add esi,eax ;; point into rastertab[y2]
mov eax,[ebp+0x8] ;; ARG1
sub eax,[ebp+0x10] ;; ARG3
shl eax,0x10 ;; ((x1-x2)<<PRE) ...
cdq
idiv ecx ;; dx = ... / (y1-y2+1)
mov [ebp-0x4],eax ;; DX
mov eax,[ebp+0x18] ;; ARG5
sub eax,[ebp+0x1c] ;; ARG6
shl eax,0x10
cdq
idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
mov eax,[ebp+0x10] ;; ARG3
shl eax,0x10 ;; x = x2<<PRE
mov ebx,[ebp+0x1c] ;; ARG6
shl ebx,0x10 ;; tx = tx2<<PRE d0
;; ty = ty2<<PRE d1
mov edx,[ebp+0x20] ;; ARG7
shl edx,0x10 ;; ty = ty<<PRE d0
;; tx = tx<<PRE d1
push ebp
mov edi,[ebp-0x4] ;; DX
cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
mov ebp,[ebp-0x8] ;; TD
je .L_rleft_h_loop
;;
;; TY varies, TX is constant
;;
.L_rleft_v_loop:
mov [esi+MINX],eax ;; rastertab[y].minx = x
add ebx,ebp
mov [esi+TX1],edx ;; .tx1 = tx
add eax,edi
mov [esi+TY1],ebx ;; .ty1 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // ty += tdy
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
dec ecx
jne .L_rleft_v_loop
pop ebp
jmp .L_finished
;;
;; TX varies, TY is constant
;;
.L_rleft_h_loop:
mov [esi+MINX],eax ;; rastertab[y].minx = x
add eax,edi
mov [esi+TX1],ebx ;; .tx1 = tx
add ebx,ebp
mov [esi+TY1],edx ;; .ty1 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // tx += tdx
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
dec ecx
jne .L_rleft_h_loop
pop ebp
jmp .L_finished
;;
;; one rasterize a segment LINE of the polygne
;;
.L_rasterize_right:
mov ecx,ebx
sub ecx,eax
inc ecx ;; y2-y1+1
mov ebx,RASTERY_SIZEOF
mul ebx ;; * y1
mov esi,[prastertab]
add esi,eax ;; point into rastertab[y1]
mov eax,[ebp+0x10] ;; ARG3
sub eax,[ebp+0x8] ;; ARG1
shl eax,0x10 ;; ((x2-x1)<<PRE) ...
cdq
idiv ecx ;; dx = ... / (y2-y1+1)
mov [ebp-0x4],eax ;; DX
mov eax,[ebp+0x1c] ;; ARG6
sub eax,[ebp+0x18] ;; ARG5
shl eax,0x10
cdq
idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
mov eax,[ebp+0x8] ;; ARG1
shl eax,0x10 ;; x = x1<<PRE
mov ebx,[ebp+0x18] ;; ARG5
shl ebx,0x10 ;; tx = tx1<<PRE d0
;; ty = ty1<<PRE d1
mov edx,[ebp+0x20] ;; ARG7
shl edx,0x10 ;; ty = ty<<PRE d0
;; tx = tx<<PRE d1
push ebp
mov edi,[ebp-0x4] ;; DX
cmp dword [ebp+0x24], 0 ;; direction ?
mov ebp,[ebp-0x8] ;; TD
je .L_rright_h_loop
;;
;; TY varies, TX is constant
;;
.L_rright_v_loop:
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
add ebx,ebp
mov [esi+TX2],edx ;; .tx2 = tx
add eax,edi
mov [esi+TY2],ebx ;; .ty2 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // ty += tdy
add esi,RASTERY_SIZEOF
dec ecx
jne .L_rright_v_loop
pop ebp
jmp short .L_finished
;;
;; TX varies, TY is constant
;;
.L_rright_h_loop:
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
add eax,edi
mov [esi+TX2],ebx ;; .tx2 = tx
add ebx,ebp
mov [esi+TY2],edx ;; .ty2 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // tx += tdx
add esi,RASTERY_SIZEOF
dec ecx
jne .L_rright_h_loop
pop ebp
.L_finished:
pop eax
o16 mov es,ax
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2021 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file tmap.s
/// \brief optimised drawing routines for span/column rendering
// structures, must match the C structures!
#include "asm_defs.inc"
// Rappel: seuls EAX, ECX, EDX peuvent tre crass librement.
// il faut sauver esi,edi, cd...gs
/* Attention aux comparaisons! */
/* */
/* Intel_compare: */
/* */
/* cmp A,B // A-B , set flags */
/* jg A_greater_than_B */
/* */
/* AT&T_compare: */
/* */
/* cmp A,B // B-A , set flags */
/* jg B_greater_than_A */
/* */
/* (soustrait l'op�rande source DE l'oprande destination, */
/* comme sur Motorola! ) */
// RAPPEL: Intel
// SECTION:[BASE+INDEX*SCALE+DISP]
// devient SECTION:DISP(BASE,INDEX,SCALE)
//----------------------------------------------------------------------
//
// R_DrawColumn
//
// New optimised version 10-01-1998 by D.Fabrice and P.Boris
// TO DO: optimise it much farther... should take at most 3 cycles/pix
// once it's fixed, add code to patch the offsets so that it
// works in every screen width.
//
//----------------------------------------------------------------------
.data
#ifdef LINUX
.align 2
#else
.align 4
#endif
C(loopcount): .long 0
C(pixelcount): .long 0
C(tystep): .long 0
C(vidwidth): .long 0 //use this one out of the inner loops
//so you don't need to patch everywhere...
#ifdef USEASM
#if !defined( LINUX)
.text
#endif
.globl C(ASM_PatchRowBytes)
C(ASM_PatchRowBytes):
pushl %ebp
movl %esp, %ebp // assure l'"adressabilit� du stack"
movl ARG1, %edx // read first arg
movl %edx, C(vidwidth)
// 1 * vidwidth
movl %edx,p1+2
movl %edx,w1+2 //water
movl %edx,p1b+2 //sky
movl %edx,p5+2
movl %edx,sh5+2 //smokie test
// 2 * vidwidth
addl ARG1,%edx
movl %edx,p2+2
movl %edx,w2+2 //water
movl %edx,p2b+2 //sky
movl %edx,p6+2
movl %edx,p7+2
movl %edx,p8+2
movl %edx,p9+2
movl %edx,sh6+2 //smokie test
movl %edx,sh7+2
movl %edx,sh8+2
movl %edx,sh9+2
// 3 * vidwidth
addl ARG1,%edx
movl %edx,p3+2
movl %edx,w3+2 //water
movl %edx,p3b+2 //sky
// 4 * vidwidth
addl ARG1,%edx
movl %edx,p4+2
movl %edx,w4+2 //water
movl %edx,p4b+2 //sky
popl %ebp
ret
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawColumn_8)
C(R_DrawColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_colormap),%eax
testb $3,%dh
jz v4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4quadloop:
movb $0x7f,%dh // prep mask
// .align 4
vquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
andb $0x7f,%bl // mask 0-127 texture index
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p1: movb %dl,0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p2: movb %dl,2*0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p3: movb %dl,3*0x12345678(%edi)
andb $0x7f,%bl
p4: addl $4*0x12345678,%edi
decb %ch
jnz vquadloop
vdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#ifdef HORIZONTALDRAW
// --------------------------------------------------------------------------
// Horizontal Column Drawer Optimisation
// --------------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawHColumn_8)
C(R_DrawHColumn_8):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
//
// dest = yhlookup[dc_x] + hcolumnofs[dc_yl];
//
movl C(dc_x),%ebx
movl C(yhlookup)(,%ebx,4),%edi
movl C(dc_yl),%ebp
movl %ebp,%ebx
addl C(hcolumnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vhdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
testb %ch, %ch
jz vhnearlydone
movl C(dc_colormap),%eax
decl %edi //-----
vhloop:
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
andb $0x7f,%bl
incl %edi //-----
movb (%eax),%dh
movb %dh,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
incl %edi //-----
adcb %cl,%bl
movb (%eax),%dl
andb $0x7f,%bl
movb %dl,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
// shll $16,%edx
andb $0x7f,%bl
incl %edi //-----
movb (%eax),%dh
movb %dh,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
incl %edi //-----
adcb %cl,%bl
movb (%eax),%dl
andb $0x7f,%bl
movb %dl,(%edi)
// movl %edx,(%edi)
// addl $4,%edi
decb %ch
jnz vhloop
vhnearlydone:
// movl C(pixelcount)
vhdone:
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
// --------------------------------------------------------------------------
// Rotate a buffer 90 degree in clockwise order after horiz.col. draws
// --------------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_RotateBuffer)
C(R_RotateBuffer):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
movl C(dc_source),%esi
movl C(dc_colormap),%edi
movb (%esi),%ah
addl $200,%esi
movb (%ebx),%al
addl $200,%ebx
bswap %eax
movb (%esi),%ah
addl $200,%esi
movb (%ebx),%al
addl $200,%ebx
movl %eax,(%edi)
addl $4,%edi
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
#endif
//----------------------------------------------------------------------
//13-02-98:
// R_DrawSkyColumn : same as R_DrawColumn but:
//
// - wrap around 256 instead of 127.
// this is needed because we have a higher texture for mouselook,
// we need at least 200 lines for the sky.
//
// NOTE: the sky should never wrap, so it could use a faster method.
// for the moment, we'll still use a wrapping method...
//
// IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!!
//
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawSkyColumn_8)
C(R_DrawSkyColumn_8):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vskydone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x000000ff,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_colormap),%eax
testb $3,%dh
jz v4skyquadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vskydone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4skyquadloop:
// .align 4
vskyquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p1b: movb %dl,0x12345678(%edi)
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p2b: movb %dl,2*0x12345678(%edi)
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p3b: movb %dl,3*0x12345678(%edi)
p4b: addl $4*0x12345678,%edi
decb %ch
jnz vskyquadloop
vskydone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawSpan
//
// Horizontal texture mapping
//
//----------------------------------------------------------------------
.data
ystep: .long 0
xstep: .long 0
C(texwidth): .long 64 // texture width
#if !defined( LINUX)
.text
#endif
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_DrawSpan_8)
C(R_DrawSpan_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// find loop count
//
movl C(ds_x2),%eax
incl %eax
subl C(ds_x1),%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
js hdone // nothing to scale
shrl $1,%eax // double pixel count
movl %eax,C(loopcount)
//
// build composite position
//
movl C(ds_xfrac),%ebp
shll $10,%ebp
andl $0x0ffff0000,%ebp
movl C(ds_yfrac),%eax
shrl $6,%eax
andl $0x0ffff,%eax
movl C(ds_y),%edi
orl %eax,%ebp
movl C(ds_source),%esi
//
// calculate screen dest
//
movl C(ylookup)(,%edi,4),%edi
movl C(ds_x1),%eax
addl C(columnofs)(,%eax,4),%edi
//
// build composite step
//
movl C(ds_xstep),%ebx
shll $10,%ebx
andl $0x0ffff0000,%ebx
movl C(ds_ystep),%eax
shrl $6,%eax
andl $0x0ffff,%eax
orl %eax,%ebx
//movl %eax,OFFSET hpatch1+2 // convice tasm to modify code...
movl %ebx,hpatch1+2
//movl %eax,OFFSET hpatch2+2 // convice tasm to modify code...
movl %ebx,hpatch2+2
movl %esi,hpatch3+2
movl %esi,hpatch4+2
// %eax aligned colormap
// %ebx aligned colormap
// %ecx,%edx scratch
// %esi virtual source
// %edi moving destination pointer
// %ebp frac
movl C(ds_colormap),%eax
// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
movl %ebp,%ecx
addl %ebx,%ebp // advance frac pointer
shrw $10,%cx
roll $6,%ecx
andl $4095,%ecx // finish calculation for third pixel
// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
movl %ebp,%edx
shrw $10,%dx
roll $6,%edx
addl %ebx,%ebp // advance frac pointer
andl $4095,%edx // finish calculation for fourth pixel
movl %eax,%ebx
movb (%esi,%ecx),%al // get first pixel
movb (%esi,%edx),%bl // get second pixel
testl $0x0fffffffe,C(pixelcount)
movb (%eax),%dl // color translate first pixel
// jnz hdoubleloop // at least two pixels to map
// jmp hchecklast
// movw $0xf0f0,%dx //see visplanes start
jz hchecklast
movb (%ebx),%dh // color translate second pixel
movl C(loopcount),%esi
// .align 4
hdoubleloop:
// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
movl %ebp,%ecx
shrw $10,%cx
roll $6,%ecx
hpatch1:
addl $0x012345678,%ebp // advance frac pointer
movw %dx,(%edi) // write first pixel
andl $4095,%ecx // finish calculation for third pixel
// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
movl %ebp,%edx
shrw $10,%dx
roll $6,%edx
hpatch3:
movb 0x012345678(%ecx),%al // get third pixel
// movb %bl,1(%edi) // write second pixel
andl $4095,%edx // finish calculation for fourth pixel
hpatch2:
addl $0x012345678,%ebp // advance frac pointer
hpatch4:
movb 0x012345678(%edx),%bl // get fourth pixel
movb (%eax),%dl // color translate third pixel
addl $2,%edi // advance to third pixel destination
decl %esi // done with loop?
movb (%ebx),%dh // color translate fourth pixel
jnz hdoubleloop
// check for final pixel
hchecklast:
testl $1,C(pixelcount)
jz hdone
movb %dl,(%edi) // write final pixel
hdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//.endif
//----------------------------------------------------------------------
// R_DrawTransColumn
//
// Vertical column texture drawer, with transparency. Replaces Doom2's
// 'fuzz' effect, which was not so beautiful.
// Transparency is always impressive in some way, don't know why...
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawTranslucentColumn_8)
C(R_DrawTranslucentColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vtdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
pushw %cx
movl %edx,%ecx
popw %cx
movl C(dc_colormap),%edx
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
shrl $2,%eax
testb $0x03,C(pixelcount)
movb %al,%ch // quad count
movl C(dc_transmap),%eax
jz vt4quadloop
//
// do un-even pixel
//
testb $1,C(pixelcount)
jz 2f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,C(pixelcount)
jz 3f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vtdone
//
// tystep : ystep frac. upper 24 bits
// edx : upper 24 bit : colomap
// dl : tmp pixel to write
// ebx : y i. lower 7 bits, masked for index
// ecx : y frac. upper 16 bits
// ecx : ch = counter, cl = y step i.
// eax : transmap aligned 65535 (upper 16 bit)
// ah : background pixel (from the screen buffer)
// al : foreground pixel (from the texture)
// esi : source texture column
// ebp,edi : dest screen
//
vt4quadloop:
movb (%esi,%ebx),%ah // fetch texel : colormap number
p5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
movl %ebp,C(tystep)
movl %edi,%ebp
subl C(vidwidth),%edi
jmp inloop
// .align 4
vtquadloop:
addl C(tystep),%ecx
adcb %cl,%bl
p6: addl $2*0x12345678,%ebp
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
inloop:
addl C(tystep),%ecx
adcb %cl,%bl
p7: addl $2*0x12345678,%edi
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
addl C(tystep),%ecx
adcb %cl,%bl
p8: addl $2*0x12345678,%ebp
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
addl C(tystep),%ecx
adcb %cl,%bl
p9: addl $2*0x12345678,%edi
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
decb %ch
jnz vtquadloop
vtdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#endif // ifdef USEASM
//----------------------------------------------------------------------
// R_DrawShadeColumn
//
// for smoke..etc.. test.
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawShadeColumn_8)
C(R_DrawShadeColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle shdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(colormaps),%eax
testb $0x03,%dh
jz sh4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz shdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
sh4quadloop:
movb $0x7f,%dh // prep mask
movb (%esi,%ebx),%ah // fetch texel : colormap number
sh5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
movl %ebp,C(tystep)
movl %edi,%ebp
subl C(vidwidth),%edi
jmp shinloop
// .align 4
shquadloop:
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh6: addl $2*0x12345678,%ebp
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
shinloop:
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh7: addl $2*0x12345678,%edi
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh8: addl $2*0x12345678,%ebp
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh9: addl $2*0x12345678,%edi
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
decb %ch
jnz shquadloop
shdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawWaterColumn : basically it's just a copy of R_DrawColumn,
// but it uses dc_colormap from dc_yl to dc_yw-1
// then it uses dc_wcolormap from dc_yw to dc_yh
//
// Thus, the 'underwater' part of the walls is remapped to 'water-like'
// colors.
//
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawWaterColumn)
C(R_DrawWaterColumn):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle wdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_wcolormap),%eax
testb $3,%dh
jz w4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz wdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
w4quadloop:
movb $0x7f,%dh // prep mask
// .align 4
wquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
andb $0x7f,%bl // mask 0-127 texture index
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w1: movb %dl,0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w2: movb %dl,2*0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w3: movb %dl,3*0x12345678(%edi)
andb $0x7f,%bl
w4: addl $4*0x12345678,%edi
decb %ch
jnz wquadloop
wdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawSpanNoWrap
//
// Horizontal texture mapping, does not remap colors,
// neither needs to wrap around the source texture.
//
// Thus, a special optimisation can be used...
//
//----------------------------------------------------------------------
.data
advancetable: .long 0, 0
#if !defined( LINUX)
.text
#endif
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_DrawSpanNoWrap)
C(R_DrawSpanNoWrap):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// find loop count
//
movl C(ds_x2),%eax
incl %eax
subl C(ds_x1),%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle htvdone // nothing to scale
// shrl $1,%eax // double pixel count
// movl %eax,C(loopcount)
//
// calculate screen dest
//
movl C(ds_y),%edi //full destination start address
//
// set up advancetable
//
movl C(ds_xstep),%ebp
movl C(ds_ystep),%ecx
movl %ecx,%eax
movl %ebp,%edx
sarl $16,%edx // xstep >>= 16;
movl C(vidwidth),%ebx
sarl $16,%eax // ystep >>= 16;
jz 0f
imull %ebx,%eax // (ystep >> 16) * texwidth;
0:
addl %edx,%eax // add in xstep
// (ystep >> 16) * texwidth + (xstep >> 16);
movl %eax,advancetable+4 // advance base in y
addl %ebx,%eax // ((ystep >> 16) + 1) * texwidth +
// (xstep >> 16);
movl %eax,advancetable // advance extra in y
shll $16,%ebp // left-justify xstep fractional part
movl %ebp,xstep
shll $16,%ecx // left-justify ystep fractional part
movl %ecx,ystep
//
// calculate the texture starting address
//
movl C(ds_source),%esi // texture source
movl C(ds_yfrac),%eax
movl %eax,%edx
sarl $16,%eax
movl C(ds_xfrac),%ecx
imull %ebx,%eax // (yfrac >> 16) * texwidth
movl %ecx,%ebx
sarl $16,%ecx
movl %ecx,%ebp
addl %eax,%ebp // source = (xfrac >> 16) +
// ((yfrac >> 16) * texwidth);
//
// esi : texture source
// edi : screen dest
// eax : colormap aligned on 256 boundary, hehehe...
// ebx : xfrac << 16
// ecx : used in loop, contains either 0 or -1, *4, offset into advancetable
// edx : yfrac << 16
// ebp : offset into texture
//
shll $16,%edx // yfrac upper word, lower byte will be used
movl C(ds_colormap),%eax
shll $16,%ebx // xfrac upper word, lower unused
movl C(pixelcount),%ecx
shrl $2,%ecx
movb %cl,%dh // quad pixels count
movl C(pixelcount),%ecx
andl $3,%ecx
jz htvquadloop // pixelcount is multiple of 4
decl %ecx
jz 1f
decl %ecx
jz 2f
//
// do one to three pixels first
//
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
2:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
1:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
//
// test if there was at least 4 pixels
//
testb $0xFF,%dh
jz htvdone
//
// two pixels per loop
// U
// V
htvquadloop:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,1(%edi)
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,2(%edi)
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,3(%edi)
addl $4, %edi
incl %ecx //dummy
decb %dh
jnz htvquadloop // paire dans V-pipe
htvdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//.endif
#ifdef HORIZONTALDRAW
// void R_RotateBuffere (void)
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_RotateBufferasm)
C(R_RotateBufferasm):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
movl C(dc_source),%esi
movl C(dc_colormap),%edi
movl $200,%edx
ra2:
movl $40,%ecx
ra:
movb -2*200(%esi),%al
movb -6*200(%esi),%bl
movb -3*200(%esi),%ah
movb -7*200(%esi),%bh
shll $16,%eax
shll $16,%ebx
movb (%esi),%al
movb -4*200(%esi),%bl
movb -1*200(%esi),%ah
movb -5*200(%esi),%bh
movl %eax,(%edi)
subl $8*200,%esi
movl %ebx,4(%edi)
addl $8,%edi
decl %ecx
jnz ra
addl $320*200+1,%esi //32*480 passe a la ligne suivante
// addl 320-32,%edi
decl %edx
jnz ra2
pop %ebp // preserve caller's stack frame pointer
pop %esi // preserve register variables
pop %edi
pop %ebx
ret
#endif
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2021 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file tmap_asm.s
/// \brief ???
//.comm _dc_colormap,4
//.comm _dc_x,4
//.comm _dc_yl,4
//.comm _dc_yh,4
//.comm _dc_iscale,4
//.comm _dc_texturemid,4
//.comm _dc_source,4
//.comm _ylookup,4
//.comm _columnofs,4
//.comm _loopcount,4
//.comm _pixelcount,4
.data
_pixelcount:
.long 0x00000000
_loopcount:
.long 0x00000000
.align 8
_mmxcomm:
.long 0x00000000
.text
.align 4
.globl _R_DrawColumn8_NOMMX
_R_DrawColumn8_NOMMX:
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
movl _dc_yl,%edx
movl _dc_yh,%eax
subl %edx,%eax
leal 1(%eax),%ebx
testl %ebx,%ebx
jle rdc8ndone
movl _dc_x,%eax
movl _ylookup, %edi
movl (%edi,%edx,4),%esi
movl _columnofs, %edi
addl (%edi,%eax,4),%esi
movl _dc_iscale,%edi
movl %edx,%eax
imull %edi,%eax
movl _dc_texturemid,%ecx
addl %eax,%ecx
movl _dc_source,%ebp
xorl %edx, %edx
subl $0x12345678, %esi
.globl rdc8nwidth1
rdc8nwidth1:
.align 4,0x90
rdc8nloop:
movl %ecx,%eax
shrl $16,%eax
addl %edi,%ecx
andl $127,%eax
addl $0x12345678,%esi
.globl rdc8nwidth2
rdc8nwidth2:
movb (%eax,%ebp),%dl
movl _dc_colormap,%eax
movb (%eax,%edx),%al
movb %al,(%esi)
decl %ebx
jne rdc8nloop
rdc8ndone:
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
//
// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
// By ES 1998/08/01
//
.globl _R_DrawColumn_8_Pentium
_R_DrawColumn_8_Pentium:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl _dc_yl,%eax // Top pixel
movl _dc_yh,%ebx // Bottom pixel
movl _ylookup, %edi
movl (%edi,%ebx,4),%ecx
subl %eax,%ebx // ebx=number of pixels-1
jl rdc8pdone // no pixel to draw, done
jnz rdc8pmany
movl _dc_x,%edx // Special case: only one pixel
movl _columnofs, %edi
addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
movl _dc_iscale,%esi
imull %esi,%eax
movl _dc_texturemid,%edi
addl %eax,%edi // texture index in edi
movl _dc_colormap,%edx
shrl $16, %edi
movl _dc_source,%ebp
andl $127,%edi
movb (%edi,%ebp),%dl // read texture pixel
movb (%edx),%al // lookup for light
movb %al,0(%ecx) // write it
jmp rdc8pdone // done!
.align 4, 0x90
rdc8pmany: // draw >1 pixel
movl _dc_x,%edx
movl _columnofs, %edi
movl (%edi,%edx,4),%edx
leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
.globl rdc8pwidth5
rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
movl _dc_iscale,%edx // edx = fracstep
imull %edx,%eax
shll $9, %edx // fixme: Should get 7.25 fix as input
movl _dc_texturemid,%ecx
addl %eax,%ecx // ecx = frac
movl _dc_colormap,%eax // eax = lighting/special effects LUT
shll $9, %ecx
movl _dc_source,%esi // esi = source ptr
imull $0x12345678, %ebx // ebx = negative offset to pixel
.globl rdc8pwidth6
rdc8pwidth6: // DeadBeef = -SCREENWIDTH
// Begin the calculation of the two first pixels
leal (%ecx, %edx), %ebp
shrl $25, %ecx
movb (%esi, %ecx), %al
leal (%edx, %ebp), %ecx
shrl $25, %ebp
movb (%eax), %dl
// The main loop
rdc8ploop:
movb (%esi,%ebp), %al // load 1
leal (%ecx, %edx), %ebp // calc frac 3
shrl $25, %ecx // shift frac 2
movb %dl, 0x12345678(%edi, %ebx)// store 0
.globl rdc8pwidth1
rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
movb (%eax), %al // lookup 1
movb %al, 0x12345678(%edi, %ebx)// store 1
.globl rdc8pwidth2
rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
movb (%esi, %ecx), %al // load 2
leal (%ebp, %edx), %ecx // calc frac 4
shrl $25, %ebp // shift frac 3
movb (%eax), %dl // lookup 2
addl $0x12345678, %ebx // counter
.globl rdc8pwidth3
rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
jl rdc8ploop // loop
// End of loop. Write extra pixel or just exit.
jnz rdc8pdone
movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
.globl rdc8pwidth4
rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
rdc8pdone:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
//
// MMX asm version, optimised for K6
// By ES 1998/07/05
//
.globl _R_DrawColumn_8_K6_MMX
_R_DrawColumn_8_K6_MMX:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
andl $7,%eax
addl $8,%eax
movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
subl %eax,%esp
movl _dc_yl,%edx // Top pixel
movl _dc_yh,%ebx // Bottom pixel
movl _ylookup, %edi
movl (%edi,%ebx,4),%ecx
subl %edx,%ebx // ebx=number of pixels-1
jl 0x12345678 // no pixel to draw, done
.globl rdc8moffs1
rdc8moffs1:
jnz rdc8mmany
movl _dc_x,%eax // Special case: only one pixel
movl _columnofs, %edi
addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
movl _dc_iscale,%esi
imull %esi,%edx
movl _dc_texturemid,%edi
addl %edx,%edi // texture index in edi
movl _dc_colormap,%edx
shrl $16, %edi
movl _dc_source,%ebp
andl $127,%edi
movb (%edi,%ebp),%dl // read texture pixel
movb (%edx),%al // lookup for light
movb %al,0(%ecx) // write it
jmp rdc8mdone // done!
.globl rdc8moffs2
rdc8moffs2:
.align 4, 0x90
rdc8mmany: // draw >1 pixel
movl _dc_x,%eax
movl _columnofs, %edi
movl (%edi,%eax,4),%eax
leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
.globl rdc8mwidth3
rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
movl _dc_iscale,%ecx // ecx = fracstep
imull %ecx,%edx
shll $9, %ecx // fixme: Should get 7.25 fix as input
movl _dc_texturemid,%eax
addl %edx,%eax // eax = frac
movl _dc_colormap,%edx // edx = lighting/special effects LUT
shll $9, %eax
leal (%ecx, %ecx), %edi
movl _dc_source,%ebp // ebp = source ptr
movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
imull $0x12345678, %ebx // ebx = negative offset to pixel
.globl rdc8mwidth5
rdc8mwidth5: // DeadBeef = -SCREENWIDTH
movl %edi, 4(%esp)
leal (%eax, %ecx), %edi
movq 0(%esp), %mm1 // fracstep:fracstep in mm1
movl %eax, 0(%esp)
shrl $25, %eax
movl %edi, 4(%esp)
movzbl (%ebp, %eax), %eax
movq 0(%esp), %mm0 // frac:frac in mm0
paddd %mm1, %mm0
shrl $25, %edi
movq %mm0, %mm2
psrld $25, %mm2 // texture index in mm2
paddd %mm1, %mm0
movq %mm2, 0(%esp)
.globl rdc8mloop
rdc8mloop: // The main loop
movq %mm0, %mm2 // move 4-5 to temp reg
movzbl (%ebp, %edi), %edi // read 1
psrld $25, %mm2 // shift 4-5
movb (%edx,%eax), %cl // lookup 0
movl 0(%esp), %eax // load 2
addl $0x12345678, %ebx // counter
.globl rdc8mwidth2
rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
movb %cl, (%esi, %ebx) // write 0
movb (%edx,%edi), %ch // lookup 1
movb %ch, 0x12345678(%esi, %ebx) // write 1
.globl rdc8mwidth1
rdc8mwidth1: // DeadBeef = SCREENWIDTH
movl 4(%esp), %edi // load 3
paddd %mm1, %mm0 // frac 6-7
movzbl (%ebp, %eax), %eax // lookup 2
movq %mm2, 0(%esp) // store texture index 4-5
jl rdc8mloop
jnz rdc8mno_odd
movb (%edx,%eax), %cl // write the last odd pixel
movb %cl, 0x12345678(%esi)
.globl rdc8mwidth4
rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
rdc8mno_odd:
.globl rdc8mdone
rdc8mdone:
emms
addl _mmxcomm, %esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
// Need some extra space to align run-time
.globl R_DrawColumn_8_K6_MMX_end
R_DrawColumn_8_K6_MMX_end:
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DOSDOOM.
;; Copyright (C) 2010-2021 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap_mmx.nas
;; DESCRIPTION:
;; Assembler optimised rendering code for software mode, using SIMD
;; instructions.
;; Draw wall columns.
[BITS 32]
%define FRACBITS 16
%define TRANSPARENTPIXEL 255
%ifdef LINUX
%macro cextern 1
[extern %1]
%endmacro
%macro cglobal 1
[global %1]
%endmacro
%else
%macro cextern 1
%define %1 _%1
[extern %1]
%endmacro
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
%endif
; The viddef_s structure. We only need the width field.
struc viddef_s
resb 12
.width: resb 4
resb 44
endstruc
;; externs
;; columns
cextern dc_colormap
cextern dc_x
cextern dc_yl
cextern dc_yh
cextern dc_iscale
cextern dc_texturemid
cextern dc_texheight
cextern dc_source
cextern dc_hires
cextern centery
cextern centeryfrac
cextern dc_transmap
cextern R_DrawColumn_8_ASM
cextern R_Draw2sMultiPatchColumn_8_ASM
;; spans
cextern nflatshiftup
cextern nflatxshift
cextern nflatyshift
cextern nflatmask
cextern ds_xfrac
cextern ds_yfrac
cextern ds_xstep
cextern ds_ystep
cextern ds_x1
cextern ds_x2
cextern ds_y
cextern ds_source
cextern ds_colormap
cextern ylookup
cextern columnofs
cextern vid
[SECTION .data]
nflatmask64 dq 0
[SECTION .text]
;;----------------------------------------------------------------------
;;
;; R_DrawColumn : 8bpp column drawer
;;
;; MMX column drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = vid.width
;; mm0 = accumulator
;; mm1 = heightmask, twice
;; mm2 = 2 * fracstep, twice
;; mm3 = pair of consecutive fracs
;;----------------------------------------------------------------------
cglobal R_DrawColumn_8_MMX
R_DrawColumn_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; Our algorithm requires that the texture height be a power of two.
;; If not, fall back to the non-MMX drawer.
;;
.texheightcheck:
mov edx, [dc_texheight]
sub edx, 1 ;; edx = heightmask
test edx, [dc_texheight]
jnz near .usenonMMX
mov ebp, edx ;; Keep a copy of heightmask in a
;; GPR for the time being.
;;
;; Fill mm1 with heightmask
;;
movd mm1, edx ;; low dword = heightmask
punpckldq mm1, mm1 ;; copy low dword to high dword
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov eax, [dc_yl]
mov edi, [ylookup+eax*4]
mov ebx, [dc_x]
add edi, [columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx, [dc_yh]
add ecx, 1
sub ecx, eax ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale;
;;
movd mm2, [dc_iscale] ;; fracstep in low dword
punpckldq mm2, mm2 ;; copy to high dword
mov ebx, [dc_colormap]
mov esi, [dc_source]
;;
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
;; eax == dc_yl already
shl eax, FRACBITS
sub eax, [centeryfrac]
imul dword [dc_iscale]
shrd eax, edx, FRACBITS
add eax, [dc_texturemid]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires], 0x01
jz .mod2
xor eax, eax
;;
;; Do mod-2 pixel.
;;
.mod2:
test ecx, 1
jz .pairprepare
mov edx, eax ;; edx = frac
add eax, [dc_iscale] ;; eax += fracstep
sar edx, FRACBITS
and edx, ebp ;; edx &= heightmask
movzx edx, byte [esi + edx]
movzx edx, byte [ebx + edx]
mov [edi], dl
add edi, [vid + viddef_s.width]
sub ecx, 1
jz .done
.pairprepare:
;;
;; Prepare for the main loop.
;;
movd mm3, eax ;; Low dword = frac
movq mm4, mm3 ;; Copy to intermediate register
paddd mm4, mm2 ;; dwords of mm4 += fracstep
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
pslld mm2, 1 ;; fracstep *= 2
;;
;; ebp = vid.width
;;
mov ebp, [vid + viddef_s.width]
align 16
.pairloop:
movq mm0, mm3 ;; 3B 1u.
psrad mm0, FRACBITS ;; 4B 1u.
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
movd eax, mm0 ;; 3B 1u. Get first frac
;; IFETCH boundary
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
movd edx, mm0 ;; 3B 1u. Get second frac
mov [edi], al ;; 2B 1(2)u. First pixel
;; IFETCH boundary
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
;; IFETCH boundary
sub ecx, 2 ;; 3B 1u. count -= 2
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
.usenonMMX:
call R_DrawColumn_8_ASM
jmp .done
;;----------------------------------------------------------------------
;;
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
;; pixels.
;;
;; MMX column drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = vid.width
;; mm0 = accumulator
;; mm1 = heightmask, twice
;; mm2 = 2 * fracstep, twice
;; mm3 = pair of consecutive fracs
;;----------------------------------------------------------------------
cglobal R_Draw2sMultiPatchColumn_8_MMX
R_Draw2sMultiPatchColumn_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; Our algorithm requires that the texture height be a power of two.
;; If not, fall back to the non-MMX drawer.
;;
.texheightcheck:
mov edx, [dc_texheight]
sub edx, 1 ;; edx = heightmask
test edx, [dc_texheight]
jnz near .usenonMMX
mov ebp, edx ;; Keep a copy of heightmask in a
;; GPR for the time being.
;;
;; Fill mm1 with heightmask
;;
movd mm1, edx ;; low dword = heightmask
punpckldq mm1, mm1 ;; copy low dword to high dword
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov eax, [dc_yl]
mov edi, [ylookup+eax*4]
mov ebx, [dc_x]
add edi, [columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx, [dc_yh]
add ecx, 1
sub ecx, eax ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale;
;;
movd mm2, [dc_iscale] ;; fracstep in low dword
punpckldq mm2, mm2 ;; copy to high dword
mov ebx, [dc_colormap]
mov esi, [dc_source]
;;
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
;; eax == dc_yl already
shl eax, FRACBITS
sub eax, [centeryfrac]
imul dword [dc_iscale]
shrd eax, edx, FRACBITS
add eax, [dc_texturemid]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires], 0x01
jz .mod2
xor eax, eax
;;
;; Do mod-2 pixel.
;;
.mod2:
test ecx, 1
jz .pairprepare
mov edx, eax ;; edx = frac
add eax, [dc_iscale] ;; eax += fracstep
sar edx, FRACBITS
and edx, ebp ;; edx &= heightmask
movzx edx, byte [esi + edx]
cmp dl, TRANSPARENTPIXEL
je .nextmod2
movzx edx, byte [ebx + edx]
mov [edi], dl
.nextmod2:
add edi, [vid + viddef_s.width]
sub ecx, 1
jz .done
.pairprepare:
;;
;; Prepare for the main loop.
;;
movd mm3, eax ;; Low dword = frac
movq mm4, mm3 ;; Copy to intermediate register
paddd mm4, mm2 ;; dwords of mm4 += fracstep
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
pslld mm2, 1 ;; fracstep *= 2
;;
;; ebp = vid.width
;;
mov ebp, [vid + viddef_s.width]
align 16
.pairloop:
movq mm0, mm3 ;; 3B 1u.
psrad mm0, FRACBITS ;; 4B 1u.
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
movd eax, mm0 ;; 3B 1u. Get first frac
;; IFETCH boundary
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
movd edx, mm0 ;; 3B 1u. Get second frac
cmp al, TRANSPARENTPIXEL ;; 2B 1u.
je .secondinpair ;; 2B 1u.
;; IFETCH boundary
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
mov [edi], al ;; 2B 1(2)u. First pixel
.secondinpair:
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
cmp dl, TRANSPARENTPIXEL ;; 2B 1u.
je .nextpair ;; 2B 1u.
;; IFETCH boundary
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
.nextpair:
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
sub ecx, 2 ;; 3B 1u. count -= 2
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
.usenonMMX:
call R_Draw2sMultiPatchColumn_8_ASM
jmp .done
;;----------------------------------------------------------------------
;;
;; R_DrawSpan : 8bpp span drawer
;;
;; MMX span drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = two pixels
;; mm0 = accumulator
;; mm1 = xposition
;; mm2 = yposition
;; mm3 = 2 * xstep
;; mm4 = 2 * ystep
;; mm5 = nflatxshift
;; mm6 = nflatyshift
;; mm7 = accumulator
;;----------------------------------------------------------------------
cglobal R_DrawSpan_8_MMX
R_DrawSpan_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; esi = ds_source
;; ebx = ds_colormap
;;
mov esi, [ds_source]
mov ebx, [ds_colormap]
;;
;; edi = ylookup[ds_y] + columnofs[ds_x1]
;;
mov eax, [ds_y]
mov edi, [ylookup + eax*4]
mov edx, [ds_x1]
add edi, [columnofs + edx*4]
;;
;; ecx = ds_x2 - ds_x1 + 1
;;
mov ecx, [ds_x2]
sub ecx, edx
add ecx, 1
;;
;; Needed for fracs and steps
;;
movd mm7, [nflatshiftup]
;;
;; mm3 = xstep
;;
movd mm3, [ds_xstep]
pslld mm3, mm7
punpckldq mm3, mm3
;;
;; mm4 = ystep
;;
movd mm4, [ds_ystep]
pslld mm4, mm7
punpckldq mm4, mm4
;;
;; mm1 = pair of consecutive xpositions
;;
movd mm1, [ds_xfrac]
pslld mm1, mm7
movq mm6, mm1
paddd mm6, mm3
punpckldq mm1, mm6
;;
;; mm2 = pair of consecutive ypositions
;;
movd mm2, [ds_yfrac]
pslld mm2, mm7
movq mm6, mm2
paddd mm6, mm4
punpckldq mm2, mm6
;;
;; mm5 = nflatxshift
;; mm6 = nflatyshift
;;
movd mm5, [nflatxshift]
movd mm6, [nflatyshift]
;;
;; Mask is in memory due to lack of registers.
;;
mov eax, [nflatmask]
mov [nflatmask64], eax
mov [nflatmask64 + 4], eax
;;
;; Go until we reach a dword boundary.
;;
.unaligned:
test edi, 3
jz .alignedprep
.stragglers:
cmp ecx, 0
je .done ;; If ecx == 0, we're finished.
;;
;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep (once!)
paddd mm2, mm4 ;; yposition += ystep (once!)
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of first pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
mov [edi], al
add edi, 1
sub ecx, 1
jmp .unaligned
.alignedprep:
;;
;; We can double the steps now.
;;
pslld mm3, 1
pslld mm4, 1
;;
;; Generate chunks of four pixels.
;;
.alignedloop:
;;
;; Make sure we have at least four pixels.
;;
cmp ecx, 4
jl .prestragglers
;;
;; First two pixels.
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep
paddd mm2, mm4 ;; yposition += ystep
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of first pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx ebp, byte [ebx + eax] ;; ebp = colormap[al]
punpckhdq mm0, mm0 ;; both dwords = high dword
movd eax, mm0 ;; eax = index of second pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 8 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
;;
;; Next two pixels.
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep
paddd mm2, mm4 ;; yposition += ystep
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of third pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 16 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
punpckhdq mm0, mm0 ;; both dwords = high dword
movd eax, mm0 ;; eax = index of second pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 24 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
;;
;; Write pixels.
;;
mov [edi], ebp
add edi, 4
sub ecx, 4
jmp .alignedloop
.prestragglers:
;;
;; Back to one step at a time.
;;
psrad mm3, 1
psrad mm4, 1
jmp .stragglers
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DooM Legacy Team.
;; Copyright (C) 1999-2021 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap_vc.nas
;; DESCRIPTION:
;; Assembler optimised math code for Visual C++.
[BITS 32]
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
[SECTION .text write]
;----------------------------------------------------------------------------
;fixed_t FixedMul (fixed_t a, fixed_t b)
;----------------------------------------------------------------------------
cglobal FixedMul
; align 16
FixedMul:
mov eax,[esp+4]
imul dword [esp+8]
shrd eax,edx,16
ret
;----------------------------------------------------------------------------
;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
;----------------------------------------------------------------------------
cglobal FixedDiv2
; align 16
FixedDiv2:
mov eax,[esp+4]
mov edx,eax ;; these two instructions allow the next
sar edx,31 ;; two to pair, on the Pentium processor.
shld edx,eax,16
sal eax,16
idiv dword [esp+8]
ret
......@@ -82,6 +82,14 @@
#define O_BINARY 0
#endif
#ifdef HAVE_THREADS
static I_mutex wad_mutex;
# define Lock_state() I_lock_mutex(&wad_mutex)
# define Unlock_state() I_unlock_mutex(wad_mutex)
#else
# define Lock_state()
# define Unlock_state()
#endif
typedef struct
{
......@@ -1922,6 +1930,8 @@ void *W_CacheLumpNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
if (!TestValidLump(wad,lump))
return NULL;
Lock_state();
lumpcache = wadfiles[wad]->lumpcache;
if (!lumpcache[lump])
{
......@@ -1931,6 +1941,8 @@ void *W_CacheLumpNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
else
Z_ChangeTag(lumpcache[lump], tag);
Unlock_state();
return lumpcache[lump];
}
......@@ -1955,9 +1967,13 @@ void *W_CacheLumpNumForce(lumpnum_t lumpnum, INT32 tag)
if (!TestValidLump(wad,lump))
return NULL;
Lock_state();
ptr = Z_Malloc(W_LumpLengthPwad(wad, lump), tag, NULL);
W_ReadLumpHeaderPwad(wad, lump, ptr, 0, 0); // read the lump in full
Unlock_state();
return ptr;
}
......@@ -1975,15 +1991,23 @@ static inline boolean W_IsLumpCachedPWAD(UINT16 wad, UINT16 lump, void *ptr)
if (!TestValidLump(wad, lump))
return false;
Lock_state();
lcache = wadfiles[wad]->lumpcache[lump];
if (ptr)
{
if (ptr == lcache)
if (ptr == lcache) {
Unlock_state();
return true;
}
}
else if (lcache)
else if (lcache) {
Unlock_state();
return true;
}
Unlock_state();
return false;
}
......@@ -2007,15 +2031,23 @@ static inline boolean W_IsPatchCachedPWAD(UINT16 wad, UINT16 lump, void *ptr)
if (!TestValidLump(wad, lump))
return false;
Lock_state();
lcache = wadfiles[wad]->patchcache[lump];
if (ptr)
{
if (ptr == lcache)
if (ptr == lcache) {
Unlock_state();
return true;
}
}
else if (lcache)
else if (lcache) {
Unlock_state();
return true;
}
Unlock_state();
return false;
}
......@@ -2048,7 +2080,7 @@ void *W_CacheLumpName(const char *name, INT32 tag)
// Cache a patch into heap memory, convert the patch format as necessary
//
void *W_CacheSoftwarePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
static void *W_CacheSoftwarePatch(UINT16 wad, UINT16 lump, INT32 tag)
{
lumpcache_t *lumpcache = NULL;
......@@ -2082,11 +2114,6 @@ void *W_CacheSoftwarePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
return lumpcache[lump];
}
void *W_CacheSoftwarePatchNum(lumpnum_t lumpnum, INT32 tag)
{
return W_CacheSoftwarePatchNumPwad(WADFILENUM(lumpnum),LUMPNUM(lumpnum),tag);
}
void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
{
patch_t *patch;
......@@ -2094,16 +2121,23 @@ void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
if (!TestValidLump(wad, lump))
return NULL;
patch = W_CacheSoftwarePatchNumPwad(wad, lump, tag);
Lock_state();
patch = W_CacheSoftwarePatch(wad, lump, tag);
#ifdef HWRENDER
// Software-only compile cache the data without conversion
if (rendermode == render_soft || rendermode == render_none)
#endif
{
Unlock_state();
return (void *)patch;
}
#ifdef HWRENDER
Patch_CreateGL(patch);
Unlock_state();
return (void *)patch;
#endif
}
......@@ -2118,6 +2152,8 @@ void W_UnlockCachedPatch(void *patch)
if (!patch)
return;
Lock_state();
// The hardware code does its own memory management, as its patches
// have different lifetimes from software's.
#ifdef HWRENDER
......@@ -2126,6 +2162,8 @@ void W_UnlockCachedPatch(void *patch)
else
#endif
Z_Unlock(patch);
Unlock_state();
}
void *W_CachePatchName(const char *name, INT32 tag)
......
......@@ -207,11 +207,6 @@ void *W_CachePatchLongName(const char *name, INT32 tag);
void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag);
void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag);
// Returns a Software patch.
// Performs any necessary conversions from PNG images.
void *W_CacheSoftwarePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag);
void *W_CacheSoftwarePatchNum(lumpnum_t lumpnum, INT32 tag);
void W_UnlockCachedPatch(void *patch);
void W_VerifyFileMD5(UINT16 wadfilenum, const char *matchmd5);
......
......@@ -39,6 +39,15 @@
#include "hardware/hw_main.h" // For hardware memory info
#endif
#ifdef HAVE_THREADS
static I_mutex Z_mutex;
# define Lock_state() I_lock_mutex(&Z_mutex)
# define Unlock_state() I_unlock_mutex(Z_mutex)
#else
# define Lock_state()
# define Unlock_state()
#endif
#ifdef HAVE_VALGRIND
#include "valgrind.h"
static boolean Z_calloc = false;
......@@ -203,6 +212,8 @@ void Z_Free(void *ptr)
if (ptr == NULL)
return;
Lock_state();
#ifdef ZDEBUG2
CONS_Debug(DBG_MEMORY, "Z_Free %s:%d\n", file, line);
#endif
......@@ -237,6 +248,8 @@ void Z_Free(void *ptr)
block->prev->next = block->next;
block->next->prev = block->prev;
free(block);
Unlock_state();
}
/** malloc() that doesn't accept failure.
......@@ -295,6 +308,8 @@ void *Z_MallocAlign(size_t size, INT32 tag, void *user, INT32 alignbits)
void *given;
size_t blocksize = extrabytes + sizeof *hdr + size;
Lock_state();
#ifdef ZDEBUG2
CONS_Debug(DBG_MEMORY, "Z_Malloc %s:%d\n", file, line);
#endif
......@@ -359,6 +374,8 @@ void *Z_MallocAlign(size_t size, INT32 tag, void *user, INT32 alignbits)
I_Error("Z_Malloc: attempted to allocate purgable block "
"(size %s) with no user", sizeu1(size));
Unlock_state();
return given;
}
......@@ -381,14 +398,19 @@ void *Z_Calloc2(size_t size, INT32 tag, void *user, INT32 alignbits, const char
void *Z_CallocAlign(size_t size, INT32 tag, void *user, INT32 alignbits)
#endif
{
void *mem;
Lock_state();
#ifdef VALGRIND_MEMPOOL_ALLOC
Z_calloc = true;
#endif
#ifdef ZDEBUG
return memset(Z_Malloc2 (size, tag, user, alignbits, file, line), 0, size);
mem = Z_Malloc2 (size, tag, user, alignbits, file, line);
#else
return memset(Z_MallocAlign(size, tag, user, alignbits ), 0, size);
mem = Z_MallocAlign(size, tag, user, alignbits );
#endif
memset(mem, 0, size);
Unlock_state();
return mem;
}
/** The Z_ReallocAlign function.
......