diff --git a/SRB2.cbp b/SRB2.cbp index 2a1eb87b8565b3d2d9c13216c23d39dceecd6f92..9e887bf859c05ab821b3936f29cb3089daef2b2d 100644 --- a/SRB2.cbp +++ b/SRB2.cbp @@ -1992,24 +1992,6 @@ HW3SOUND for 3D hardware sound support <Option compilerVar="CC" /> </Unit> <Unit filename="src/v_video.h" /> - <Unit filename="src/vid_copy.s"> - <Option compilerVar="CC" /> - <Option compiler="avrgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option compiler="gnu_gcc_compiler_for_mingw32" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option compiler="gnu_gcc_compiler_for_mingw64" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option compiler="armelfgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option compiler="tricoregcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option compiler="ppcgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option compiler="gcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" /> - <Option target="Debug Native/SDL" /> - <Option target="Release Native/SDL" /> - <Option target="Debug Linux/SDL" /> - <Option target="Release Linux/SDL" /> - <Option target="Debug Mingw/SDL" /> - <Option target="Release Mingw/SDL" /> - <Option target="Debug Mingw/DirectX" /> - <Option target="Release Mingw/DirectX" /> - </Unit> <Unit filename="src/w_wad.c"> <Option compilerVar="CC" /> </Unit> diff --git a/SRB2_common.props b/SRB2_common.props index 0f80ceb174874e682f0205de06733cb25e2b247a..6a0d53484f10106bc254851b1e1056dc7b24a86a 100644 --- a/SRB2_common.props +++ b/SRB2_common.props @@ -25,9 +25,6 @@ </Link> </ItemDefinitionGroup> <ItemDefinitionGroup Condition="'$(PlatformTarget)'=='x86'"> - <ClCompile> - <PreprocessorDefinitions>USEASM;%(PreprocessorDefinitions)</PreprocessorDefinitions> - </ClCompile> <Link> <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers> </Link> diff --git a/Srb2.dev b/Srb2.dev index 21683e7c3c5e055393d91778fba3405bfae240de..8bd36cf490cc84dae69d25399113d346392e4fc8 100644 --- a/Srb2.dev +++ b/Srb2.dev @@ -5,7 +5,7 @@ Ver=3 IsCpp=0 Type=0 UnitCount=279 -Folders=A_Asm,B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad +Folders=B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad CommandLine= CompilerSettings=00000000000100000111e1 PchHead=-1 @@ -1473,36 +1473,6 @@ Priority=1000 OverrideBuildCmd=0 BuildCmd= -[Unit149] -FileName=src\tmap.nas -Folder=A_Asm -Compile=0 -CompileCpp=0 -Link=0 -Priority=1000 -OverrideBuildCmd=1 -BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap.nas - -[Unit150] -FileName=src\asm_defs.inc -Folder=A_Asm -Compile=0 -CompileCpp=0 -Link=0 -Priority=1000 -OverrideBuildCmd=0 -BuildCmd= - -[Unit151] -FileName=src\vid_copy.s -Folder=A_Asm -Compile=1 -CompileCpp=0 -Link=1 -Priority=1000 -OverrideBuildCmd=1 -BuildCmd=$(CC) $(CFLAGS) -x assembler-with-cpp -c src/vid_copy.s -o $@ - [Unit152] FileName=src\y_inter.h Folder=H_Hud @@ -1543,26 +1513,6 @@ Priority=1000 OverrideBuildCmd=0 BuildCmd= -[Unit156] -FileName=src\p5prof.h -Folder=A_Asm -Compile=1 -CompileCpp=0 -Link=1 -Priority=1000 -OverrideBuildCmd=0 -BuildCmd= - -[Unit157] -FileName=src\tmap_mmx.nas -Folder=A_Asm -Compile=0 -CompileCpp=0 -Link=0 -Priority=1000 -OverrideBuildCmd=1 -BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap_mmx.nas - [Unit159] FileName=src\lzf.h Folder=W_Wad diff --git a/appveyor.yml b/appveyor.yml index e3348d35cb2b001fba3162792867548b278d63f5..9770cb37df07fd9be9ae685f3770ca7410915c9f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -7,8 +7,6 @@ environment: # c:\mingw-w64 i686 has gcc 6.3.0, so use c:\msys64 7.3.0 instead MINGW_SDK: c:\msys64\mingw32 CFLAGS: -Wno-implicit-fallthrough - NASM_ZIP: nasm-2.12.01 - NASM_URL: http://www.nasm.us/pub/nasm/releasebuilds/2.12.01/win64/nasm-2.12.01-win64.zip UPX_ZIP: upx391w UPX_URL: http://upx.sourceforge.net/download/upx391w.zip CCACHE_EXE: ccache.exe @@ -40,17 +38,12 @@ environment: ASSET_CLEAN: 0 cache: -- nasm-2.12.01.zip - upx391w.zip - ccache.exe - C:\Users\appveyor\.ccache - C:\Users\appveyor\srb2_cache install: -- if not exist "%NASM_ZIP%.zip" appveyor DownloadFile "%NASM_URL%" -FileName "%NASM_ZIP%.zip" -- 7z x -y "%NASM_ZIP%.zip" -o%TMP% >null -- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%NASM_ZIP%" "%MINGW_SDK%\bin" nasm.exe || exit 0 - - if not exist "%UPX_ZIP%.zip" appveyor DownloadFile "%UPX_URL%" -FileName "%UPX_ZIP%.zip" - 7z x -y "%UPX_ZIP%.zip" -o%TMP% >null - robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%UPX_ZIP%" "%MINGW_SDK%\bin" upx.exe || exit 0 @@ -65,7 +58,6 @@ configuration: before_build: - set "Path=%MINGW_SDK%\bin;%Path%" - mingw32-make --version -- nasm -v - if not [%NOUPX%] == [1] ( upx -V ) - ccache -V - ccache -s diff --git a/cmake/Modules/CMakeASM_YASMInformation.cmake b/cmake/Modules/CMakeASM_YASMInformation.cmake deleted file mode 100644 index 1765180853bb2d23217a1eb785f97737411e68b1..0000000000000000000000000000000000000000 --- a/cmake/Modules/CMakeASM_YASMInformation.cmake +++ /dev/null @@ -1,46 +0,0 @@ - -#============================================================================= -# Copyright 2010 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -# support for the yasm assembler - -set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS nasm yasm asm) - -if(NOT CMAKE_ASM_YASM_OBJECT_FORMAT) - if(WIN32) - if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8) - set(CMAKE_ASM_YASM_OBJECT_FORMAT win64) - else() - set(CMAKE_ASM_YASM_OBJECT_FORMAT win32) - endif() - elseif(APPLE) - if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8) - set(CMAKE_ASM_YASM_OBJECT_FORMAT macho64) - else() - set(CMAKE_ASM_YASM_OBJECT_FORMAT macho) - endif() - else() - if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8) - set(CMAKE_ASM_YASM_OBJECT_FORMAT elf64) - else() - set(CMAKE_ASM_YASM_OBJECT_FORMAT elf) - endif() - endif() -endif() - -set(CMAKE_ASM_YASM_COMPILE_OBJECT "<CMAKE_ASM_YASM_COMPILER> <FLAGS> -f ${CMAKE_ASM_YASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>") - -# Load the generic ASMInformation file: -set(ASM_DIALECT "_YASM") -include(CMakeASMInformation) -set(ASM_DIALECT) diff --git a/cmake/Modules/CMakeDetermineASM_YASMCompiler.cmake b/cmake/Modules/CMakeDetermineASM_YASMCompiler.cmake deleted file mode 100644 index a5e7c9e5801121f04411e5f1b1c6efa98736bcc1..0000000000000000000000000000000000000000 --- a/cmake/Modules/CMakeDetermineASM_YASMCompiler.cmake +++ /dev/null @@ -1,27 +0,0 @@ - -#============================================================================= -# Copyright 2010 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -# Find the nasm assembler. yasm (http://www.tortall.net/projects/yasm/) is nasm compatible - -set(CMAKE_ASM_YASM_COMPILER_LIST nasm yasm) - -if(NOT CMAKE_ASM_YASM_COMPILER) - find_program(CMAKE_ASM_YASM_COMPILER yasm - "$ENV{ProgramFiles}/YASM") -endif() - -# Load the generic DetermineASM compiler file with the DIALECT set properly: -set(ASM_DIALECT "_YASM") -include(CMakeDetermineASMCompiler) -set(ASM_DIALECT) diff --git a/cmake/Modules/CMakeTestASM_YASMCompiler.cmake b/cmake/Modules/CMakeTestASM_YASMCompiler.cmake deleted file mode 100644 index 745f7125c4a2f7a003c488b89d977b75a8eb3ebc..0000000000000000000000000000000000000000 --- a/cmake/Modules/CMakeTestASM_YASMCompiler.cmake +++ /dev/null @@ -1,23 +0,0 @@ - -#============================================================================= -# Copyright 2010 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -# This file is used by EnableLanguage in cmGlobalGenerator to -# determine that the selected ASM_NASM "compiler" works. -# For assembler this can only check whether the compiler has been found, -# because otherwise there would have to be a separate assembler source file -# for each assembler on every architecture. - -set(ASM_DIALECT "_YASM") -include(CMakeTestASMCompiler) -set(ASM_DIALECT) diff --git a/src/Android.mk b/src/Android.mk index a461da2242c7ab813831c95c1d442353756b0907..035d48887727c2a6d6b63a6acb38fc7ec65a9342 100644 --- a/src/Android.mk +++ b/src/Android.mk @@ -76,7 +76,7 @@ LOCAL_SRC_FILES := am_map.c \ android/i_system.c \ android/i_video.c -LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOASM -DNOPIX -DUNIXCOMMON -DNOTERMIOS +LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOPIX -DUNIXCOMMON -DNOTERMIOS LOCAL_MODULE := libsrb2 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b62237374295ce93feeecb64a40220d4a6ce1561..8cd0310137481882a4594a2d009038a3be50bf19 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,15 +13,7 @@ target_compile_features(SRB2SDL2 PRIVATE c_std_11 cxx_std_17) target_sourcefile(c) target_sources(SRB2SDL2 PRIVATE comptime.c md5.c config.h.in) -set(SRB2_ASM_SOURCES vid_copy.s) - -set(SRB2_NASM_SOURCES tmap_mmx.nas tmap.nas) - ### Configuration -set(SRB2_CONFIG_USEASM OFF CACHE BOOL - "Enable NASM tmap implementation for software mode speedup.") -set(SRB2_CONFIG_YASM OFF CACHE BOOL - "Use YASM in place of NASM.") set(SRB2_CONFIG_DEV_BUILD OFF CACHE BOOL "Compile a development build of SRB2.") @@ -78,33 +70,6 @@ if("${SRB2_CONFIG_HWRENDER}") endif() endif() -if(${SRB2_CONFIG_USEASM}) - #SRB2_ASM_FLAGS can be used to pass flags to either nasm or yasm. - if("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") - set(SRB2_ASM_FLAGS "-DLINUX ${SRB2_ASM_FLAGS}") - endif() - - if(${SRB2_CONFIG_YASM}) - set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS} nas) - set(CMAKE_ASM_YASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.") - enable_language(ASM_YASM) - else() - set(CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS} nas) - set(CMAKE_ASM_NASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.") - enable_language(ASM_NASM) - endif() - - set(SRB2_USEASM ON) - target_compile_definitions(SRB2SDL2 PRIVATE -DUSEASM) - target_compile_options(SRB2SDL2 PRIVATE -msse3 -mfpmath=sse) - - target_sources(SRB2SDL2 PRIVATE ${SRB2_ASM_SOURCES} - ${SRB2_NASM_SOURCES}) -else() - set(SRB2_USEASM OFF) - target_compile_definitions(SRB2SDL2 PRIVATE -DNONX86 -DNORUSEASM) -endif() - # Targets # If using CCACHE, then force it. diff --git a/src/Makefile b/src/Makefile index 36b1a7efabea7416f42c624fc03ab93d2a15c05f..92f4c0c66c735e9930ac85cc8f26ee87c70658b6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -47,8 +47,6 @@ # HAVE_MINIUPNPC=1 - Enable automated port forwarding. # Already enabled by default for 32-bit # Windows. -# NOASM=1 - Disable hand optimized assembly code for the -# Software renderer. # NOPNG=1 - Disable PNG graphics support. (TODO: double # check netplay compatible.) # NOCURL=1 - Disable libcurl--HTTP capability. @@ -88,7 +86,6 @@ # executable. # WINDOWSHELL=1 - Use Windows commands. # PREFIX= - Prefix to many commands, for cross compiling. -# YASM=1 - Use Yasm instead of NASM assembler. # STABS=1 - ? # ECHO=1 - Print out each command in the build process. # NOECHOFILENAMES=1 - Don't print out each that is being @@ -148,22 +145,6 @@ OBJCOPY:=$(call Prefix,objcopy) OBJDUMP:=$(call Prefix,objdump) WINDRES:=$(call Prefix,windres) -ifdef YASM -NASM?=yasm -else -NASM?=nasm -endif - -ifdef YASM -ifdef STABS -NASMOPTS?=-g stabs -else -NASMOPTS?=-g dwarf2 -endif -else -NASMOPTS?=-g -endif - GZIP?=gzip GZIP_OPTS?=-9 -f -n ifdef WINDOWSHELL @@ -187,8 +168,6 @@ makedir:=../make opts:=-DCOMPVERSION -g libs:= -nasm_format:= - # This is a list of variables names, of which if defined, # also defines the name as a macro to the compiler. passthru_opts:= @@ -316,7 +295,6 @@ endif LD:=$(CC) cc:=$(cc) $(opts) -nasm=$(NASM) $(NASMOPTS) -f $(nasm_format) ifdef UPX upx=$(UPX) $(UPX_OPTS) endif @@ -393,7 +371,6 @@ $(objdir)/%.$(1) : %.$(2) | $$$$(@D)/ endef $(eval $(call _recipe,o,c,$(cc) -c -o $$@ $$<)) -$(eval $(call _recipe,o,nas,$(nasm) -o $$@ $$<)) $(eval $(call _recipe,o,s,$(cc) $(asflags) -c -o $$@ $$<)) $(eval $(call _recipe,res,rc,$(windres) -i $$< -o $$@)) diff --git a/src/Makefile.d/features.mk b/src/Makefile.d/features.mk index 8ba33383bb2f0c8169e92f91c6c8fea25c6c852f..1787f94cb8988e6b27d5bb334c5a978ef2b31947 100644 --- a/src/Makefile.d/features.mk +++ b/src/Makefile.d/features.mk @@ -18,13 +18,6 @@ opts+=-DHWRENDER sources+=$(call List,hardware/Sourcefile) endif -ifndef NOASM -ifndef NONX86 -sources+=tmap.nas tmap_mmx.nas -opts+=-DUSEASM -endif -endif - ifndef NOMD5 sources+=md5.c endif diff --git a/src/Makefile.d/nix.mk b/src/Makefile.d/nix.mk index 767b64c12be4bf42fede8e07e80cba68151ef92a..aa2e96df7eb402cb64d685a7af95a44a69d71355 100644 --- a/src/Makefile.d/nix.mk +++ b/src/Makefile.d/nix.mk @@ -9,10 +9,6 @@ opts+=-DUNIXCOMMON -DLUA_USE_POSIX # instead of addresses libs+=-lm -rdynamic -ifndef nasm_format -nasm_format:=elf -DLINUX -endif - ifndef NOHW opts+=-I/usr/X11R6/include libs+=-L/usr/X11R6/lib @@ -35,7 +31,6 @@ endif # FIXME: UNTESTED #ifdef SOLARIS #NOIPX=1 -#NOASM=1 #opts+=-I/usr/local/include -I/opt/sfw/include \ # -DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP #libs+=-L/opt/sfw/lib -lsocket -lnsl diff --git a/src/Makefile.d/platform.mk b/src/Makefile.d/platform.mk index c5ac71a20adc24766a961cb544af54cf3a1b59b1..d19143e4cf6040dc161b201553db3942b123ee39 100644 --- a/src/Makefile.d/platform.mk +++ b/src/Makefile.d/platform.mk @@ -39,7 +39,6 @@ else ifdef SOLARIS # FIXME: UNTESTED UNIX=1 platform=solaris else ifdef CYGWIN32 # FIXME: UNTESTED -nasm_format=win32 platform=cygwin else ifdef MINGW ifdef MINGW64 diff --git a/src/Makefile.d/sdl.mk b/src/Makefile.d/sdl.mk index 99ca624e69f2f18c10625c93585f14681636f36e..a1bfa33038bbacebada85790a7565fceb9440985 100644 --- a/src/Makefile.d/sdl.mk +++ b/src/Makefile.d/sdl.mk @@ -56,13 +56,6 @@ SDL_LDFLAGS?=$(shell $(SDL_CONFIG) \ $(eval $(call Propogate_flags,SDL)) endif -# use the x86 asm code -ifndef CYGWIN32 -ifndef NOASM -USEASM=1 -endif -endif - ifdef MINGW ifndef NOSDLMAIN SDLMAIN=1 diff --git a/src/Makefile.d/win32.mk b/src/Makefile.d/win32.mk index 0e48ed68359523e70b4ba0a6d8eade4b81d1d9ca..73a3d9e453ecaa0a01e32e15f71326bd7be57920 100644 --- a/src/Makefile.d/win32.mk +++ b/src/Makefile.d/win32.mk @@ -17,8 +17,6 @@ sources+=win32/Srb2win.rc opts+=-DSTDC_HEADERS libs+=-ladvapi32 -lkernel32 -lmsvcrt -luser32 -nasm_format:=win32 - SDL?=1 ifndef NOHW diff --git a/src/Sourcefile b/src/Sourcefile index 7c530500052e7379fa503ef888445dde79d0d350..f2b408c665d28306ce9c778646f6139b6874099a 100644 --- a/src/Sourcefile +++ b/src/Sourcefile @@ -81,7 +81,6 @@ mserv.c http-mserv.c i_tcp.c lzf.c -vid_copy.s b_bot.c u_list.c lua_script.c diff --git a/src/asm_defs.inc b/src/asm_defs.inc deleted file mode 100644 index 48f8da0d8f582f28ad09674eec97b4af840f40b9..0000000000000000000000000000000000000000 --- a/src/asm_defs.inc +++ /dev/null @@ -1,43 +0,0 @@ -// SONIC ROBO BLAST 2 -//----------------------------------------------------------------------------- -// Copyright (C) 1998-2000 by DooM Legacy Team. -// Copyright (C) 1999-2023 by Sonic Team Junior. -// -// This program is free software distributed under the -// terms of the GNU General Public License, version 2. -// See the 'LICENSE' file for more details. -//----------------------------------------------------------------------------- -/// \file asm_defs.inc -/// \brief must match the C structures - -#ifndef __ASM_DEFS__ -#define __ASM_DEFS__ - -// this makes variables more noticable, -// and make the label match with C code - -// Linux, unlike DOS, has no "_" 19990119 by Kin -// and nasm needs .data code segs under linux 20010210 by metzgermeister -// FIXME: nasm ignores these settings, so I put the macros into the makefile -#ifdef __ELF__ -#define C(label) label -#define CODE_SEG .data -#else -#define C(label) _##label -#define CODE_SEG .text -#endif - -/* This is a more readable way to access the arguments passed from C code */ -/* PLEASE NOTE: it is supposed that all arguments passed from C code are */ -/* 32bit integer (INT32, long, and most *pointers) */ -#define ARG1 8(%ebp) -#define ARG2 12(%ebp) -#define ARG3 16(%ebp) -#define ARG4 20(%ebp) -#define ARG5 24(%ebp) -#define ARG6 28(%ebp) -#define ARG7 32(%ebp) -#define ARG8 36(%ebp) -#define ARG9 40(%ebp) //(c)tm ... Allegro by Shawn Hargreaves. - -#endif diff --git a/src/d_netcmd.c b/src/d_netcmd.c index 45a394eff5a4f9d00de6c125450dafeae473c932..1b987bcf130496249a0bd670765ea981d52d8ed0 100644 --- a/src/d_netcmd.c +++ b/src/d_netcmd.c @@ -3893,11 +3893,6 @@ static void Command_Version_f(void) else // 16-bit? 128-bit? CONS_Printf("Bits Unknown "); - // No ASM? -#ifdef NOASM - CONS_Printf("\x85" "NOASM " "\x80"); -#endif - // Debug build #ifdef _DEBUG CONS_Printf("\x85" "DEBUG " "\x80"); diff --git a/src/p5prof.h b/src/p5prof.h deleted file mode 100644 index a9ed3965e9691f9931cd360da7cc41efc2ee5c55..0000000000000000000000000000000000000000 --- a/src/p5prof.h +++ /dev/null @@ -1,278 +0,0 @@ -/********************************************************* - * - * File: p5prof.h - * By: Kevin Baca - * - * MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG - * (WHICH MEANS WRITE THE LOW DWORD FIRST) - * - * Now in yer code do: - * INT64 count,total; - * - * ... - * RDMSR(0x10,&count); //inner loop count - * total += count; - * ... - * - * printf("0x%x %x", (INT32)total, *((INT32 *)&total+1)); - * // HIGH LOW - * - *********************************************************/ -/**\file - \brief This file provides macros to profile your code. - - Here's how they work... - - As you may or may not know, the Pentium class of - processors provides extremely fine grained profiling - capabilities through the use of what are called - Machine Specific Registers (MSRs). These registers - can provide information about almost any aspect of - CPU performance down to a single cycle. - - The MSRs of interest for profiling are specified by - indices 0x10, 0x11, 0x12, and 0x13. Here is a brief - description of each of these registers: - - MSR 0x10 - This register is simple a cycle counter. - - MSR 0x11 - This register controls what type of profiling data - will be gathered. - - MSRs 0x12 and 0x13 - These registers gather the profiling data specified in - MSR 0x11. - - Each MSR is 64 bits wide. For the Pentium processor, - only the lower 32 bits of MSR 0x11 are valid. Bits 0-15 - specify what data will be gathered in MSR 0x12. Bits 16-31 - specify what data will be gathered in MSR 0x13. Both sets - of bits have the same format: - - Bits 0-5 specify which hardware event will be tracked. - Bit 6, if set, indicates events will be tracked in - rings 0-2. - Bit 7, if set, indicates events will be tracked in - ring 3. - Bit 8, if set, indicates cycles should be counted for - the specified event. If clear, it indicates the - number of events should be counted. - - Two instructions are provided for manupulating the MSRs. - RDMSR (Read Machine Specific Register) and WRMSR - (Write Machine Specific Register). These opcodes were - originally undocumented and therefore most assemblers don't - recognize them. Their byte codes are provided in the - macros below. - - RDMSR takes the MSR index in ecx and the profiling criteria - in edx : eax. - - WRMSR takes the MSR index in ecx and returns the profile data - in edx : eax. - - Two profiling registers limits profiling capability to - gathering only two types of information. The register - usage can, however, be combined in interesting ways. - For example, you can set one register to gather the - number of a specific type of event while the other gathers - the number of cycles for the same event. Or you can - gather the number of two separate events while using - MSR 0x10 to gather the number of cycles. - - The enumerated list provides somewhat readable labels for - the types of events that can be tracked. - - For more information, get ahold of appendix H from the - Intel Pentium programmer's manual (I don't remember the - order number) or go to - http://green.kaist.ac.kr/jwhahn/art3.htm. - That's an article by Terje Mathisen where I got most of - my information. - - You may use this code however you wish. I hope it's - useful and I hope I got everything right. - - -Kevin - - kbaca@skygames.com - -*/ - -#ifdef __GNUC__ - -#define RDTSC(_dst) \ -__asm__(" - .byte 0x0F,0x31 - movl %%edx,(%%edi) - movl %%eax,4(%%edi)"\ -: : "D" (_dst) : "eax", "edx", "edi") - -// the old code... swapped it -// movl %%edx,(%%edi) -// movl %%eax,4(%%edi)" -#define RDMSR(_msri, _msrd) \ -__asm__(" - .byte 0x0F,0x32 - movl %%eax,(%%edi) - movl %%edx,4(%%edi)"\ -: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi") - -#define WRMSR(_msri, _msrd) \ -__asm__(" - xorl %%edx,%%edx - .byte 0x0F,0x30"\ -: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx") - -#define RDMSR_0x12_0x13(_msr12, _msr13) \ -__asm__(" - movl $0x12,%%ecx - .byte 0x0F,0x32 - movl %%edx,(%%edi) - movl %%eax,4(%%edi) - movl $0x13,%%ecx - .byte 0x0F,0x32 - movl %%edx,(%%esi) - movl %%eax,4(%%esi)"\ -: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi") - -#define ZERO_MSR_0x12_0x13() \ -__asm__(" - xorl %%edx,%%edx - xorl %%eax,%%eax - movl $0x12,%%ecx - .byte 0x0F,0x30 - movl $0x13,%%ecx - .byte 0x0F,0x30"\ -: : : "eax", "ecx", "edx") - -#elif defined (__WATCOMC__) - -extern void RDTSC(UINT32 *dst); -#pragma aux RDTSC =\ - "db 0x0F,0x31"\ - "mov [edi],edx"\ - "mov [4+edi],eax"\ - parm [edi]\ - modify [eax edx edi]; - -extern void RDMSR(UINT32 msri, UINT32 *msrd); -#pragma aux RDMSR =\ - "db 0x0F,0x32"\ - "mov [edi],edx"\ - "mov [4+edi],eax"\ - parm [ecx] [edi]\ - modify [eax ecx edx edi]; - -extern void WRMSR(UINT32 msri, UINT32 msrd); -#pragma aux WRMSR =\ - "xor edx,edx"\ - "db 0x0F,0x30"\ - parm [ecx] [eax]\ - modify [eax ecx edx]; - -extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13); -#pragma aux RDMSR_0x12_0x13 =\ - "mov ecx,0x12"\ - "db 0x0F,0x32"\ - "mov [edi],edx"\ - "mov [4+edi],eax"\ - "mov ecx,0x13"\ - "db 0x0F,0x32"\ - "mov [esi],edx"\ - "mov [4+esi],eax"\ - parm [edi] [esi]\ - modify [eax ecx edx edi esi]; - -extern void ZERO_MSR_0x12_0x13(void); -#pragma aux ZERO_MSR_0x12_0x13 =\ - "xor edx,edx"\ - "xor eax,eax"\ - "mov ecx,0x12"\ - "db 0x0F,0x30"\ - "mov ecx,0x13"\ - "db 0x0F,0x30"\ - modify [eax ecx edx]; - -#endif - -typedef enum -{ - DataRead, - DataWrite, - DataTLBMiss, - DataReadMiss, - DataWriteMiss, - WriteHitEM, - DataCacheLinesWritten, - DataCacheSnoops, - DataCacheSnoopHit, - MemAccessBothPipes, - BankConflict, - MisalignedDataRef, - CodeRead, - CodeTLBMiss, - CodeCacheMiss, - SegRegLoad, - RESERVED0, - RESERVED1, - Branch, - BTBHit, - TakenBranchOrBTBHit, - PipelineFlush, - InstructionsExeced, - InstructionsExecedVPipe, - BusUtilizationClocks, - PipelineStalledWriteBackup, - PipelineStalledDateMemRead, - PipeLineStalledWriteEM, - LockedBusCycle, - IOReadOrWriteCycle, - NonCacheableMemRef, - AGI, - RESERVED2, - RESERVED3, - FPOperation, - Breakpoint0Match, - Breakpoint1Match, - Breakpoint2Match, - Breakpoint3Match, - HWInterrupt, - DataReadOrWrite, - DataReadOrWriteMiss -}; - -#define PROF_CYCLES (0x100) -#define PROF_EVENTS (0x000) -#define RING_012 (0x40) -#define RING_3 (0x80) -#define RING_0123 (RING_012 | RING_3) - -/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/ -#define ProfSetProfiles(_msr12, _msr13)\ -{\ - UINT32 prof;\ -\ - prof = (_msr12) | ((_msr13) << 16);\ - WRMSR(0x11, prof);\ -} - -/*void ProfBeginProfiles(void);*/ -#define ProfBeginProfiles()\ - ZERO_MSR_0x12_0x13(); - -/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/ -#define ProfGetProfiles(_msr12, _msr13)\ - RDMSR_0x12_0x13(_msr12, _msr13); - -/*void ProfZeroTimer(void);*/ -#define ProfZeroTimer()\ - WRMSR(0x10, 0); - -/*void ProfReadTimer(UINT32 timer[2]);*/ -#define ProfReadTimer(timer)\ - RDMSR(0x10, timer); - -/*EOF*/ diff --git a/src/r_draw.c b/src/r_draw.c index b0467e4f728d4cf757b53484a3d5ca4fda9d91cc..df9e1a4608b568706452df29bbc347adef075b01 100644 --- a/src/r_draw.c +++ b/src/r_draw.c @@ -179,8 +179,6 @@ CV_PossibleValue_t Color_cons_t[MAXSKINCOLORS+1]; void R_InitTranslucencyTables(void) { // Load here the transparency lookup tables 'TRANSx0' - // NOTE: the TRANSx0 resources MUST BE aligned on 64k for the asm - // optimised code (in other words, transtables pointer low word is 0) transtables = Z_MallocAlign(NUMTRANSTABLES*0x10000, PU_STATIC, NULL, 16); diff --git a/src/r_draw.h b/src/r_draw.h index ea03a8e3d53e059570822a0119ee6431f45d105a..0103ed82782b22c7a51beb10c20473a3e8ba3787 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -225,18 +225,6 @@ void R_DrawTiltedTransSolidColorSpan_8(void); void R_DrawWaterSolidColorSpan_8(void); void R_DrawTiltedWaterSolidColorSpan_8(void); -#ifdef USEASM -void ASMCALL R_DrawColumn_8_ASM(void); -void ASMCALL R_DrawShadeColumn_8_ASM(void); -void ASMCALL R_DrawTranslucentColumn_8_ASM(void); -void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void); - -void ASMCALL R_DrawColumn_8_MMX(void); - -void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void); -void ASMCALL R_DrawSpan_8_MMX(void); -#endif - // ------------------ // 16bpp DRAWING CODE // ------------------ diff --git a/src/r_splats.c b/src/r_splats.c index d182d628ba8fc09ab3d8e898a339f29b3b2d92cb..737b6d110a0a11e04137447a9b6fe901b76fef93 100644 --- a/src/r_splats.c +++ b/src/r_splats.c @@ -31,20 +31,8 @@ static void prepare_rastertab(void); static void R_RasterizeFloorSplat(floorsplat_t *pSplat, vector2_t *verts, vissprite_t *vis); -#ifdef USEASM -void ASMCALL rasterize_segment_tex_asm(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2, INT32 tc, INT32 dir); -#endif - static void rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2, INT32 tc, INT32 dir) { -#ifdef USEASM - if (R_ASM) - { - rasterize_segment_tex_asm(x1, y1, x2, y2, tv1, tv2, tc, dir); - return; - } - else -#endif { fixed_t xs, xe, count; fixed_t dx0, dx1; diff --git a/src/screen.c b/src/screen.c index fe5b399958e7082bd872478a53a4ef2b3da37df1..417e793bde540c62a9edf2b6a0b8073250ee7f73 100644 --- a/src/screen.c +++ b/src/screen.c @@ -44,10 +44,6 @@ // SRB2Kart #include "r_fps.h" // R_GetFramerateCap -#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200)) -#define RUSEASM //MSC.NET can't patch itself -#endif - // -------------------------------------------- // assembly or c drawer routines for 8bpp/16bpp // -------------------------------------------- @@ -102,7 +98,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_ // Short and Tall sky drawer, for the current color mode void (*walldrawerfunc)(void); -boolean R_ASM = true; boolean R_486 = false; boolean R_586 = false; boolean R_MMX = false; @@ -169,26 +164,6 @@ void SCR_SetDrawFuncs(void) spanfuncs_npo2[SPANDRAWFUNC_WATER] = R_DrawWaterSpan_NPO2_8; spanfuncs_npo2[SPANDRAWFUNC_TILTEDWATER] = R_DrawTiltedWaterSpan_NPO2_8; -#ifdef RUSEASM - if (R_ASM) - { - if (R_MMX) - { - colfuncs[BASEDRAWFUNC] = R_DrawColumn_8_MMX; - //colfuncs[COLDRAWFUNC_SHADE] = R_DrawShadeColumn_8_ASM; - //colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_8_ASM; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_8_MMX; - spanfuncs[BASEDRAWFUNC] = R_DrawSpan_8_MMX; - } - else - { - colfuncs[BASEDRAWFUNC] = R_DrawColumn_8_ASM; - //colfuncs[COLDRAWFUNC_SHADE] = R_DrawShadeColumn_8_ASM; - //colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_8_ASM; - colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_8_ASM; - } - } -#endif } /* else if (vid.bpp > 1) { @@ -271,8 +246,6 @@ void SCR_Startup(void) CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2); } - if (M_CheckParm("-noASM")) - R_ASM = false; if (M_CheckParm("-486")) R_486 = true; if (M_CheckParm("-586")) diff --git a/src/sdl/CMakeLists.txt b/src/sdl/CMakeLists.txt index be540b778b733976a9ceb08c636bcd30d36d29d2..aab83ca2841efe125ca1776d8ceb62bd7282810f 100644 --- a/src/sdl/CMakeLists.txt +++ b/src/sdl/CMakeLists.txt @@ -8,11 +8,6 @@ target_sources(SRB2SDL2 PRIVATE ogl_sdl.c) target_sources(SRB2SDL2 PRIVATE i_threads.c) -if(${SRB2_USEASM}) - set_source_files_properties(${SRB2_ASM_SOURCES} PROPERTIES LANGUAGE C) - set_source_files_properties(${SRB2_ASM_SOURCES} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") -endif() - if("${CMAKE_SYSTEM_NAME}" MATCHES Windows) target_sources(SRB2SDL2 PRIVATE ../win32/win_dbg.c @@ -68,18 +63,6 @@ if("${CMAKE_SYSTEM_NAME}" MATCHES Linux) target_link_libraries(SRB2SDL2 PRIVATE m rt) endif() -if(${SRB2_USEASM}) - if(${SRB2_CONFIG_YASM}) - set(ASM_ASSEMBLER_TEMP ${CMAKE_ASM_YASM_COMPILER}) - set(ASM_ASSEMBLER_OBJFORMAT ${CMAKE_ASM_YASM_OBJECT_FORMAT}) - set_source_files_properties(${SRB2_NASM_SOURCES} LANGUAGE ASM_YASM) - else() - set(ASM_ASSEMBLER_TEMP ${CMAKE_ASM_NASM_COMPILER}) - set(ASM_ASSEMBLER_OBJFORMAT ${CMAKE_ASM_NASM_OBJECT_FORMAT}) - set_source_files_properties(${SRB2_NASM_SOURCES} LANGUAGE ASM_NASM) - endif() -endif() - if("${CMAKE_SYSTEM_NAME}" MATCHES Windows) target_link_libraries(SRB2SDL2 PRIVATE ws2_32 diff --git a/src/sdl/MakeCYG.cfg b/src/sdl/MakeCYG.cfg index 5907579c1bc9d16abb0e338cb709566fd75b6c61..b78316b00142dfcb96188f6fb45baa047a628ed0 100644 --- a/src/sdl/MakeCYG.cfg +++ b/src/sdl/MakeCYG.cfg @@ -7,7 +7,6 @@ NOHW=1 NOHS=1 - NOASM=1 OPTS+=-DLINUX diff --git a/src/sdl/i_main.c b/src/sdl/i_main.c index 1dee379c0d8d95db186f1e9e3bb301554ed0549f..3eeacd83569188bd3bfdb5c8f2b9e720b5dce9f4 100644 --- a/src/sdl/i_main.c +++ b/src/sdl/i_main.c @@ -70,39 +70,6 @@ char logfilename[1024]; typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID); #endif -#if defined (_WIN32) -static inline VOID MakeCodeWritable(VOID) -{ -#ifdef USEASM // Disable write-protection of code segment - DWORD OldRights; - const DWORD NewRights = PAGE_EXECUTE_READWRITE; - PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL); - PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage; - PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew); - PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER) - ((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER)); - LPVOID pA = pBaseOfImage+oH->BaseOfCode; - SIZE_T pS = oH->SizeOfCode; -#if 1 // try to find the text section - PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH); - WORD s; - for (s = 0; s < ntH->FileHeader.NumberOfSections; s++) - { - if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0) - { - pA = pBaseOfImage+ntS[s].VirtualAddress; - pS = ntS[s].Misc.VirtualSize; - break; - } - } -#endif - - if (!VirtualProtect(pA,pS,NewRights,&OldRights)) - I_Error("Could not make code writable\n"); -#endif -} -#endif - #ifdef LOGMESSAGES static void InitLogging(void) { @@ -243,7 +210,6 @@ int main(int argc, char **argv) #ifndef __MINGW32__ prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo); #endif - MakeCodeWritable(); #endif // startup SRB2 diff --git a/src/tmap.nas b/src/tmap.nas deleted file mode 100644 index 85091cbd5d8dd9b1ab33cdd4938325eefeb1922b..0000000000000000000000000000000000000000 --- a/src/tmap.nas +++ /dev/null @@ -1,957 +0,0 @@ -;; SONIC ROBO BLAST 2 -;;----------------------------------------------------------------------------- -;; Copyright (C) 1998-2000 by DooM Legacy Team. -;; Copyright (C) 1999-2023 by Sonic Team Junior. -;; -;; This program is free software distributed under the -;; terms of the GNU General Public License, version 2. -;; See the 'LICENSE' file for more details. -;;----------------------------------------------------------------------------- -;; FILE: -;; tmap.nas -;; DESCRIPTION: -;; Assembler optimised rendering code for software mode. -;; Draw wall columns. - - -[BITS 32] - -%define FRACBITS 16 -%define TRANSPARENTPIXEL 255 - -%ifdef LINUX -%macro cextern 1 -[extern %1] -%endmacro - -%macro cglobal 1 -[global %1] -%endmacro - -%else -%macro cextern 1 -%define %1 _%1 -[extern %1] -%endmacro - -%macro cglobal 1 -%define %1 _%1 -[global %1] -%endmacro - -%endif - - -; The viddef_s structure. We only need the width field. -struc viddef_s - resb 12 -.width: resb 4 - resb 44 -endstruc - -;; externs -;; columns -cextern dc_x -cextern dc_yl -cextern dc_yh -cextern ylookup -cextern columnofs -cextern dc_source -cextern dc_texturemid -cextern dc_texheight -cextern dc_iscale -cextern dc_hires -cextern centery -cextern centeryfrac -cextern dc_colormap -cextern dc_transmap -cextern colormaps -cextern vid -cextern topleft - -; DELME -cextern R_DrawColumn_8 - -; polygon edge rasterizer -cextern prastertab - -[SECTION .data] - -;;.align 4 -loopcount dd 0 -pixelcount dd 0 -tystep dd 0 - -[SECTION .text] - -;;---------------------------------------------------------------------- -;; -;; R_DrawColumn : 8bpp column drawer -;; -;; New optimised version 10-01-1998 by D.Fabrice and P.Boris -;; Revised by G. Dick July 2010 to support the intervening twelve years' -;; worth of changes to the renderer. Since I only vaguely know what I'm -;; doing, this is probably rather suboptimal. Help appreciated! -;; -;;---------------------------------------------------------------------- -;; fracstep, vid.width in memory -;; eax = accumulator -;; ebx = colormap -;; ecx = count -;; edx = heightmask -;; esi = source -;; edi = dest -;; ebp = frac -;;---------------------------------------------------------------------- - -cglobal R_DrawColumn_8_ASM -; align 16 -R_DrawColumn_8_ASM: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx -;; -;; dest = ylookup[dc_yl] + columnofs[dc_x]; -;; - mov ebp,[dc_yl] - mov edi,[ylookup+ebp*4] - mov ebx,[dc_x] - add edi,[columnofs+ebx*4] ;; edi = dest -;; -;; pixelcount = yh - yl + 1 -;; - mov ecx,[dc_yh] - add ecx,1 - sub ecx,ebp ;; pixel count - jle near .done ;; nothing to scale -;; -;; fracstep = dc_iscale; // But we just use [dc_iscale] -;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep)); -;; - mov eax,ebp ;; dc_yl - shl eax,FRACBITS - sub eax,[centeryfrac] - imul dword [dc_iscale] - shrd eax,edx,FRACBITS - add eax,[dc_texturemid] - mov ebp,eax ;; ebp = frac - - mov ebx,[dc_colormap] - - mov esi,[dc_source] -;; -;; if (dc_hires) frac = 0; -;; - test byte [dc_hires],0x01 - jz .texheightcheck - xor ebp,ebp - -;; -;; Check for power of two -;; -.texheightcheck: - mov edx,[dc_texheight] - sub edx,1 ;; edx = heightmask - test edx,[dc_texheight] - jnz .notpowertwo - - test ecx,0x01 ;; Test for odd no. pixels - jnz .odd - -;; -;; Texture height is a power of two, so we get modular arithmetic by -;; masking -;; -.powertwo: - mov eax,ebp ;; eax = frac - sar eax,FRACBITS ;; Integer part - and eax,edx ;; eax &= heightmask - movzx eax,byte [esi + eax] ;; eax = texel - add ebp,[dc_iscale] ;; frac += fracstep - movzx eax,byte [ebx+eax] ;; Map through colormap - mov [edi],al ;; Write pixel - ;; dest += vid.width - add edi,[vid + viddef_s.width] - -.odd: - mov eax,ebp ;; eax = frac - sar eax,FRACBITS ;; Integer part - and eax,edx ;; eax &= heightmask - movzx eax,byte [esi + eax] ;; eax = texel - add ebp,[dc_iscale] ;; frac += fracstep - movzx eax,byte [ebx+eax] ;; Map through colormap - mov [edi],al ;; Write pixel - ;; dest += vid.width - add edi,[vid + viddef_s.width] - - - sub ecx,2 ;; count -= 2 - jg .powertwo - - jmp .done - -.notpowertwo: - add edx,1 - shl edx,FRACBITS - test ebp,ebp - jns .notpowtwoloop - -.makefracpos: - add ebp,edx ;; frac is negative; make it positive - js .makefracpos - -.notpowtwoloop: - cmp ebp,edx ;; Reduce mod height - jl .writenonpowtwo - sub ebp,edx - jmp .notpowtwoloop - -.writenonpowtwo: - mov eax,ebp ;; eax = frac - sar eax,FRACBITS ;; Integer part. - mov bl,[esi + eax] ;; ebx = colormap + texel - add ebp,[dc_iscale] ;; frac += fracstep - movzx eax,byte [ebx] ;; Map through colormap - mov [edi],al ;; Write pixel - ;; dest += vid.width - add edi,[vid + viddef_s.width] - - sub ecx,1 - jnz .notpowtwoloop - -;; - -.done: - pop ebx ;; restore register variables - pop edi - pop esi - pop ebp ;; restore caller's stack frame pointer - ret - - -;;---------------------------------------------------------------------- -;; -;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent -;; pixels. -;; -;; New optimised version 10-01-1998 by D.Fabrice and P.Boris -;; Revised by G. Dick July 2010 to support the intervening twelve years' -;; worth of changes to the renderer. Since I only vaguely know what I'm -;; doing, this is probably rather suboptimal. Help appreciated! -;; -;;---------------------------------------------------------------------- -;; fracstep, vid.width in memory -;; eax = accumulator -;; ebx = colormap -;; ecx = count -;; edx = heightmask -;; esi = source -;; edi = dest -;; ebp = frac -;;---------------------------------------------------------------------- - -cglobal R_Draw2sMultiPatchColumn_8_ASM -; align 16 -R_Draw2sMultiPatchColumn_8_ASM: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx -;; -;; dest = ylookup[dc_yl] + columnofs[dc_x]; -;; - mov ebp,[dc_yl] - mov edi,[ylookup+ebp*4] - mov ebx,[dc_x] - add edi,[columnofs+ebx*4] ;; edi = dest -;; -;; pixelcount = yh - yl + 1 -;; - mov ecx,[dc_yh] - add ecx,1 - sub ecx,ebp ;; pixel count - jle near .done ;; nothing to scale -;; -;; fracstep = dc_iscale; // But we just use [dc_iscale] -;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep)); -;; - mov eax,ebp ;; dc_yl - shl eax,FRACBITS - sub eax,[centeryfrac] - imul dword [dc_iscale] - shrd eax,edx,FRACBITS - add eax,[dc_texturemid] - mov ebp,eax ;; ebp = frac - - mov ebx,[dc_colormap] - - mov esi,[dc_source] -;; -;; if (dc_hires) frac = 0; -;; - test byte [dc_hires],0x01 - jz .texheightcheck - xor ebp,ebp - -;; -;; Check for power of two -;; -.texheightcheck: - mov edx,[dc_texheight] - sub edx,1 ;; edx = heightmask - test edx,[dc_texheight] - jnz .notpowertwo - - test ecx,0x01 ;; Test for odd no. pixels - jnz .odd - -;; -;; Texture height is a power of two, so we get modular arithmetic by -;; masking -;; -.powertwo: - mov eax,ebp ;; eax = frac - sar eax,FRACBITS ;; Integer part - and eax,edx ;; eax &= heightmask - movzx eax,byte [esi + eax] ;; eax = texel - add ebp,[dc_iscale] ;; frac += fracstep - cmp al,TRANSPARENTPIXEL ;; Is pixel transparent? - je .nextpowtwoeven ;; If so, advance. - movzx eax,byte [ebx+eax] ;; Map through colormap - mov [edi],al ;; Write pixel -.nextpowtwoeven: - ;; dest += vid.width - add edi,[vid + viddef_s.width] - -.odd: - mov eax,ebp ;; eax = frac - sar eax,FRACBITS ;; Integer part - and eax,edx ;; eax &= heightmask - movzx eax,byte [esi + eax] ;; eax = texel - add ebp,[dc_iscale] ;; frac += fracstep - cmp al,TRANSPARENTPIXEL ;; Is pixel transparent? - je .nextpowtwoodd ;; If so, advance. - movzx eax,byte [ebx+eax] ;; Map through colormap - mov [edi],al ;; Write pixel -.nextpowtwoodd: - ;; dest += vid.width - add edi,[vid + viddef_s.width] - - - sub ecx,2 ;; count -= 2 - jg .powertwo - - jmp .done - -.notpowertwo: - add edx,1 - shl edx,FRACBITS - test ebp,ebp - jns .notpowtwoloop - -.makefracpos: - add ebp,edx ;; frac is negative; make it positive - js .makefracpos - -.notpowtwoloop: - cmp ebp,edx ;; Reduce mod height - jl .writenonpowtwo - sub ebp,edx - jmp .notpowtwoloop - -.writenonpowtwo: - mov eax,ebp ;; eax = frac - sar eax,FRACBITS ;; Integer part. - mov bl,[esi + eax] ;; ebx = colormap + texel - add ebp,[dc_iscale] ;; frac += fracstep - cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent? - je .nextnonpowtwo ;; If so, advance. - movzx eax,byte [ebx] ;; Map through colormap - mov [edi],al ;; Write pixel -.nextnonpowtwo: - ;; dest += vid.width - add edi,[vid + viddef_s.width] - - sub ecx,1 - jnz .notpowtwoloop - -;; - -.done: - pop ebx ;; restore register variables - pop edi - pop esi - pop ebp ;; restore caller's stack frame pointer - ret - -;;---------------------------------------------------------------------- -;; R_DrawTranslucentColumnA_8 -;; -;; Vertical column texture drawer, with transparency. Replaces Doom2's -;; 'fuzz' effect, which was not so beautiful. -;; Transparency is always impressive in some way, don't know why... -;;---------------------------------------------------------------------- - -cglobal R_DrawTranslucentColumn_8_ASM -R_DrawTranslucentColumn_8_ASM: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx -;; -;; dest = ylookup[dc_yl] + columnofs[dc_x]; -;; - mov ebp,[dc_yl] - mov ebx,ebp - mov edi,[ylookup+ebx*4] - mov ebx,[dc_x] - add edi,[columnofs+ebx*4] ;; edi = dest -;; -;; pixelcount = yh - yl + 1 -;; - mov eax,[dc_yh] - inc eax - sub eax,ebp ;; pixel count - mov [pixelcount],eax ;; save for final pixel - jle near vtdone ;; nothing to scale -;; -;; frac = dc_texturemid - (centery-dc_yl)*fracstep; -;; - mov ecx,[dc_iscale] ;; fracstep - mov eax,[centery] - sub eax,ebp - imul eax,ecx - mov edx,[dc_texturemid] - sub edx,eax - mov ebx,edx - - shr ebx,16 ;; frac int. - and ebx,0x7f - shl edx,16 ;; y frac up - - mov ebp,ecx - shl ebp,16 ;; fracstep f. up - shr ecx,16 ;; fracstep i. ->cl - and cl,0x7f - push cx - mov ecx,edx - pop cx - mov edx,[dc_colormap] - mov esi,[dc_source] -;; -;; lets rock :) ! -;; - mov eax,[pixelcount] - shr eax,0x2 - test byte [pixelcount],0x3 - mov ch,al ;; quad count - mov eax,[dc_transmap] - je vt4quadloop -;; -;; do un-even pixel -;; - test byte [pixelcount],0x1 - je trf2 - - mov ah,[esi+ebx] ;; fetch texel : colormap number - add ecx,ebp - adc bl,cl - mov al,[edi] ;; fetch dest : index into colormap - and bl,0x7f - mov dl,[eax] - mov dl,[edx] - mov [edi],dl -pf: add edi,0x12345678 -;; -;; do two non-quad-aligned pixels -;; -trf2: test byte [pixelcount],0x2 - je trf3 - - mov ah,[esi+ebx] ;; fetch texel : colormap number - add ecx,ebp - adc bl,cl - mov al,[edi] ;; fetch dest : index into colormap - and bl,0x7f - mov dl,[eax] - mov dl,[edx] - mov [edi],dl -pg: add edi,0x12345678 - - mov ah,[esi+ebx] ;; fetch texel : colormap number - add ecx,ebp - adc bl,cl - mov al,[edi] ;; fetch dest : index into colormap - and bl,0x7f - mov dl,[eax] - mov dl,[edx] - mov [edi],dl -ph: add edi,0x12345678 -;; -;; test if there was at least 4 pixels -;; -trf3: test ch,0xff ;; test quad count - je near vtdone - -;; -;; ebp : ystep frac. upper 24 bits -;; edx : y frac. upper 24 bits -;; ebx : y i. lower 7 bits, masked for index -;; ecx : ch = counter, cl = y step i. -;; eax : colormap aligned 256 -;; esi : source texture column -;; edi : dest screen -;; -vt4quadloop: - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov [tystep],ebp -pi: add edi,0x12345678 - mov al,[edi] ;; fetch dest : index into colormap -pj: sub edi,0x12345678 - mov ebp,edi -pk: sub edi,0x12345678 - jmp short inloop -align 4 -vtquadloop: - add ecx,[tystep] - adc bl,cl -q1: add ebp,0x23456789 - and bl,0x7f - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov dl,[edx] - mov [edi],dl - mov al,[ebp] ;; fetch dest : index into colormap -inloop: - add ecx,[tystep] - adc bl,cl -q2: add edi,0x23456789 - and bl,0x7f - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov dl,[edx] - mov [ebp+0x0],dl - mov al,[edi] ;; fetch dest : index into colormap - - add ecx,[tystep] - adc bl,cl -q3: add ebp,0x23456789 - and bl,0x7f - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov dl,[edx] - mov [edi],dl - mov al,[ebp] ;; fetch dest : index into colormap - - add ecx,[tystep] - adc bl,cl -q4: add edi,0x23456789 - and bl,0x7f - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov dl,[edx] - mov [ebp],dl - mov al,[edi] ;; fetch dest : index into colormap - - dec ch - jne vtquadloop -vtdone: - pop ebx - pop edi - pop esi - pop ebp - ret - -;;---------------------------------------------------------------------- -;; R_DrawShadeColumn -;; -;; for smoke..etc.. test. -;;---------------------------------------------------------------------- -cglobal R_DrawShadeColumn_8_ASM -R_DrawShadeColumn_8_ASM: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx - -;; -;; dest = ylookup[dc_yl] + columnofs[dc_x]; -;; - mov ebp,[dc_yl] - mov ebx,ebp - mov edi,[ylookup+ebx*4] - mov ebx,[dc_x] - add edi,[columnofs+ebx*4] ;; edi = dest -;; -;; pixelcount = yh - yl + 1 -;; - mov eax,[dc_yh] - inc eax - sub eax,ebp ;; pixel count - mov [pixelcount],eax ;; save for final pixel - jle near shdone ;; nothing to scale -;; -;; frac = dc_texturemid - (centery-dc_yl)*fracstep; -;; - mov ecx,[dc_iscale] ;; fracstep - mov eax,[centery] - sub eax,ebp - imul eax,ecx - mov edx,[dc_texturemid] - sub edx,eax - mov ebx,edx - shr ebx,16 ;; frac int. - and ebx,byte +0x7f - shl edx,16 ;; y frac up - - mov ebp,ecx - shl ebp,16 ;; fracstep f. up - shr ecx,16 ;; fracstep i. ->cl - and cl,0x7f - - mov esi,[dc_source] -;; -;; lets rock :) ! -;; - mov eax,[pixelcount] - mov dh,al - shr eax,2 - mov ch,al ;; quad count - mov eax,[colormaps] - test dh,3 - je sh4quadloop -;; -;; do un-even pixel -;; - test dh,0x1 - je shf2 - - mov ah,[esi+ebx] ;; fetch texel : colormap number - add edx,ebp - adc bl,cl - mov al,[edi] ;; fetch dest : index into colormap - and bl,0x7f - mov dl,[eax] - mov [edi],dl -pl: add edi,0x12345678 -;; -;; do two non-quad-aligned pixels -;; -shf2: - test dh,0x2 - je shf3 - - mov ah,[esi+ebx] ;; fetch texel : colormap number - add edx,ebp - adc bl,cl - mov al,[edi] ;; fetch dest : index into colormap - and bl,0x7f - mov dl,[eax] - mov [edi],dl -pm: add edi,0x12345678 - - mov ah,[esi+ebx] ;; fetch texel : colormap number - add edx,ebp - adc bl,cl - mov al,[edi] ;; fetch dest : index into colormap - and bl,0x7f - mov dl,[eax] - mov [edi],dl -pn: add edi,0x12345678 -;; -;; test if there was at least 4 pixels -;; -shf3: - test ch,0xff ;; test quad count - je near shdone - -;; -;; ebp : ystep frac. upper 24 bits -;; edx : y frac. upper 24 bits -;; ebx : y i. lower 7 bits, masked for index -;; ecx : ch = counter, cl = y step i. -;; eax : colormap aligned 256 -;; esi : source texture column -;; edi : dest screen -;; -sh4quadloop: - mov dh,0x7f ;; prep mask - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov [tystep],ebp -po: add edi,0x12345678 - mov al,[edi] ;; fetch dest : index into colormap -pp: sub edi,0x12345678 - mov ebp,edi -pq: sub edi,0x12345678 - jmp short shinloop - -align 4 -shquadloop: - add edx,[tystep] - adc bl,cl - and bl,dh -q5: add ebp,0x12345678 - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov [edi],dl - mov al,[ebp] ;; fetch dest : index into colormap -shinloop: - add edx,[tystep] - adc bl,cl - and bl,dh -q6: add edi,0x12345678 - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov [ebp],dl - mov al,[edi] ;; fetch dest : index into colormap - - add edx,[tystep] - adc bl,cl - and bl,dh -q7: add ebp,0x12345678 - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov [edi],dl - mov al,[ebp] ;; fetch dest : index into colormap - - add edx,[tystep] - adc bl,cl - and bl,dh -q8: add edi,0x12345678 - mov dl,[eax] - mov ah,[esi+ebx] ;; fetch texel : colormap number - mov [ebp],dl - mov al,[edi] ;; fetch dest : index into colormap - - dec ch - jne shquadloop - -shdone: - pop ebx ;; restore register variables - pop edi - pop esi - pop ebp ;; restore caller's stack frame pointer - ret - - -;; ======================================================================== -;; Rasterization of the segments of a LINEAR polygne textur of manire. -;; It is thus a question of interpolating coordinate them at the edges of texture in -;; the time that the X-coordinates minx/maxx for each line. -;; the argument ' dir' indicates which edges of texture are Interpol?: -;; 0: segments associs at edge TOP? and BOTTOM? (constant TY) -;; 1: segments associs at the LEFT and RIGHT edge (constant TX) -;; ======================================================================== -;; -;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir ); -;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 -;; -;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant. -;; -;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant. -;; -;; -;; Uses: extern struct rastery *_rastertab; -;; - -MINX EQU 0 -MAXX EQU 4 -TX1 EQU 8 -TY1 EQU 12 -TX2 EQU 16 -TY2 EQU 20 -RASTERY_SIZEOF EQU 24 - -cglobal rasterize_segment_tex_asm -rasterize_segment_tex_asm: - push ebp - mov ebp,esp - - sub esp,byte +0x8 ;; allocate the local variables - - push ebx - push esi - push edi - o16 mov ax,es - push eax - -;; #define DX [ebp-4] -;; #define TD [ebp-8] - - mov eax,[ebp+0xc] ;; y1 - mov ebx,[ebp+0x14] ;; y2 - cmp ebx,eax - je near .L_finished ;; special (y1==y2) segment horizontal, exit! - - jg near .L_rasterize_right - -;;rasterize_left: ;; one rasterize a segment LEFT of the polygne - - mov ecx,eax - sub ecx,ebx - inc ecx ;; y1-y2+1 - - mov eax,RASTERY_SIZEOF - mul ebx ;; * y2 - mov esi,[prastertab] - add esi,eax ;; point into rastertab[y2] - - mov eax,[ebp+0x8] ;; ARG1 - sub eax,[ebp+0x10] ;; ARG3 - shl eax,0x10 ;; ((x1-x2)<<PRE) ... - cdq - idiv ecx ;; dx = ... / (y1-y2+1) - mov [ebp-0x4],eax ;; DX - - mov eax,[ebp+0x18] ;; ARG5 - sub eax,[ebp+0x1c] ;; ARG6 - shl eax,0x10 - cdq - idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1) - mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1) - - mov eax,[ebp+0x10] ;; ARG3 - shl eax,0x10 ;; x = x2<<PRE - - mov ebx,[ebp+0x1c] ;; ARG6 - shl ebx,0x10 ;; tx = tx2<<PRE d0 - ;; ty = ty2<<PRE d1 - mov edx,[ebp+0x20] ;; ARG7 - shl edx,0x10 ;; ty = ty<<PRE d0 - ;; tx = tx<<PRE d1 - push ebp - mov edi,[ebp-0x4] ;; DX - cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ? - - mov ebp,[ebp-0x8] ;; TD - je .L_rleft_h_loop -;; -;; TY varies, TX is constant -;; -.L_rleft_v_loop: - mov [esi+MINX],eax ;; rastertab[y].minx = x - add ebx,ebp - mov [esi+TX1],edx ;; .tx1 = tx - add eax,edi - mov [esi+TY1],ebx ;; .ty1 = ty - - ;;addl DX, %eax // x += dx - ;;addl TD, %ebx // ty += tdy - - add esi,RASTERY_SIZEOF ;; next raster line into rastertab[] - dec ecx - jne .L_rleft_v_loop - pop ebp - jmp .L_finished -;; -;; TX varies, TY is constant -;; -.L_rleft_h_loop: - mov [esi+MINX],eax ;; rastertab[y].minx = x - add eax,edi - mov [esi+TX1],ebx ;; .tx1 = tx - add ebx,ebp - mov [esi+TY1],edx ;; .ty1 = ty - - ;;addl DX, %eax // x += dx - ;;addl TD, %ebx // tx += tdx - - add esi,RASTERY_SIZEOF ;; next raster line into rastertab[] - dec ecx - jne .L_rleft_h_loop - pop ebp - jmp .L_finished -;; -;; one rasterize a segment LINE of the polygne -;; -.L_rasterize_right: - mov ecx,ebx - sub ecx,eax - inc ecx ;; y2-y1+1 - - mov ebx,RASTERY_SIZEOF - mul ebx ;; * y1 - mov esi,[prastertab] - add esi,eax ;; point into rastertab[y1] - - mov eax,[ebp+0x10] ;; ARG3 - sub eax,[ebp+0x8] ;; ARG1 - shl eax,0x10 ;; ((x2-x1)<<PRE) ... - cdq - idiv ecx ;; dx = ... / (y2-y1+1) - mov [ebp-0x4],eax ;; DX - - mov eax,[ebp+0x1c] ;; ARG6 - sub eax,[ebp+0x18] ;; ARG5 - shl eax,0x10 - cdq - idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1) - mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1) - - mov eax,[ebp+0x8] ;; ARG1 - shl eax,0x10 ;; x = x1<<PRE - - mov ebx,[ebp+0x18] ;; ARG5 - shl ebx,0x10 ;; tx = tx1<<PRE d0 - ;; ty = ty1<<PRE d1 - mov edx,[ebp+0x20] ;; ARG7 - shl edx,0x10 ;; ty = ty<<PRE d0 - ;; tx = tx<<PRE d1 - push ebp - mov edi,[ebp-0x4] ;; DX - - cmp dword [ebp+0x24], 0 ;; direction ? - - mov ebp,[ebp-0x8] ;; TD - je .L_rright_h_loop -;; -;; TY varies, TX is constant -;; -.L_rright_v_loop: - - mov [esi+MAXX],eax ;; rastertab[y].maxx = x - add ebx,ebp - mov [esi+TX2],edx ;; .tx2 = tx - add eax,edi - mov [esi+TY2],ebx ;; .ty2 = ty - - ;;addl DX, %eax // x += dx - ;;addl TD, %ebx // ty += tdy - - add esi,RASTERY_SIZEOF - dec ecx - jne .L_rright_v_loop - - pop ebp - - jmp short .L_finished -;; -;; TX varies, TY is constant -;; -.L_rright_h_loop: - mov [esi+MAXX],eax ;; rastertab[y].maxx = x - add eax,edi - mov [esi+TX2],ebx ;; .tx2 = tx - add ebx,ebp - mov [esi+TY2],edx ;; .ty2 = ty - - ;;addl DX, %eax // x += dx - ;;addl TD, %ebx // tx += tdx - - add esi,RASTERY_SIZEOF - dec ecx - jne .L_rright_h_loop - - pop ebp - -.L_finished: - pop eax - o16 mov es,ax - pop edi - pop esi - pop ebx - - mov esp,ebp - pop ebp - ret diff --git a/src/tmap.s b/src/tmap.s deleted file mode 100644 index d98d82e25cedbea383b71beb122e7f250e12d765..0000000000000000000000000000000000000000 --- a/src/tmap.s +++ /dev/null @@ -1,1587 +0,0 @@ -// SONIC ROBO BLAST 2 -//----------------------------------------------------------------------------- -// Copyright (C) 1998-2000 by DooM Legacy Team. -// Copyright (C) 1999-2023 by Sonic Team Junior. -// -// This program is free software distributed under the -// terms of the GNU General Public License, version 2. -// See the 'LICENSE' file for more details. -//----------------------------------------------------------------------------- -/// \file tmap.s -/// \brief optimised drawing routines for span/column rendering - -// structures, must match the C structures! -#include "asm_defs.inc" - -// Rappel: seuls EAX, ECX, EDX peuvent �tre �cras�s librement. -// il faut sauver esi,edi, cd...gs - -/* Attention aux comparaisons! */ -/* */ -/* Intel_compare: */ -/* */ -/* cmp A,B // A-B , set flags */ -/* jg A_greater_than_B */ -/* */ -/* AT&T_compare: */ -/* */ -/* cmp A,B // B-A , set flags */ -/* jg B_greater_than_A */ -/* */ -/* (soustrait l'op�rande source DE l'op�rande destination, */ -/* comme sur Motorola! ) */ - -// RAPPEL: Intel -// SECTION:[BASE+INDEX*SCALE+DISP] -// devient SECTION:DISP(BASE,INDEX,SCALE) - -//---------------------------------------------------------------------- -// -// R_DrawColumn -// -// New optimised version 10-01-1998 by D.Fabrice and P.Boris -// TO DO: optimise it much farther... should take at most 3 cycles/pix -// once it's fixed, add code to patch the offsets so that it -// works in every screen width. -// -//---------------------------------------------------------------------- - - .data -#ifdef LINUX - .align 2 -#else - .align 4 -#endif -C(loopcount): .long 0 -C(pixelcount): .long 0 -C(tystep): .long 0 - -C(vidwidth): .long 0 //use this one out of the inner loops - //so you don't need to patch everywhere... - -#ifdef USEASM -#if !defined( LINUX) - .text -#endif -.globl C(ASM_PatchRowBytes) -C(ASM_PatchRowBytes): - pushl %ebp - movl %esp, %ebp // assure l'"adressabilit� du stack" - - movl ARG1, %edx // read first arg - movl %edx, C(vidwidth) - - // 1 * vidwidth - movl %edx,p1+2 - movl %edx,w1+2 //water - movl %edx,p1b+2 //sky - - movl %edx,p5+2 - movl %edx,sh5+2 //smokie test - - // 2 * vidwidth - addl ARG1,%edx - - movl %edx,p2+2 - movl %edx,w2+2 //water - movl %edx,p2b+2 //sky - - movl %edx,p6+2 - movl %edx,p7+2 - movl %edx,p8+2 - movl %edx,p9+2 - movl %edx,sh6+2 //smokie test - movl %edx,sh7+2 - movl %edx,sh8+2 - movl %edx,sh9+2 - - // 3 * vidwidth - addl ARG1,%edx - - movl %edx,p3+2 - movl %edx,w3+2 //water - movl %edx,p3b+2 //sky - - // 4 * vidwidth - addl ARG1,%edx - - movl %edx,p4+2 - movl %edx,w4+2 //water - movl %edx,p4b+2 //sky - - popl %ebp - ret - - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif -.globl C(R_DrawColumn_8) -C(R_DrawColumn_8): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - -// -// dest = ylookup[dc_yl] + columnofs[dc_x]; -// - movl C(dc_yl),%ebp - movl %ebp,%ebx - movl C(ylookup)(,%ebx,4),%edi - movl C(dc_x),%ebx - addl C(columnofs)(,%ebx,4),%edi // edi = dest - -// -// pixelcount = yh - yl + 1 -// - movl C(dc_yh),%eax - incl %eax - subl %ebp,%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle vdone // nothing to scale - -// -// frac = dc_texturemid - (centery-dc_yl)*fracstep; -// - movl C(dc_iscale),%ecx // fracstep - movl C(centery),%eax - subl %ebp,%eax - imul %ecx,%eax - movl C(dc_texturemid),%edx - subl %eax,%edx - movl %edx,%ebx - shrl $16,%ebx // frac int. - andl $0x0000007f,%ebx - shll $16,%edx // y frac up - - movl %ecx,%ebp - shll $16,%ebp // fracstep f. up - shrl $16,%ecx // fracstep i. ->cl - andb $0x7f,%cl - - movl C(dc_source),%esi - -// -// lets rock :) ! -// - movl C(pixelcount),%eax - movb %al,%dh - shrl $2,%eax - movb %al,%ch // quad count - movl C(dc_colormap),%eax - testb $3,%dh - jz v4quadloop - -// -// do un-even pixel -// - testb $1,%dh - jz 2f - - movb (%esi,%ebx),%al // prep un-even loops - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - andb $0x7f,%bl // mask 0-127 texture index - movb %dl,(%edi) // output pixel - addl C(vidwidth),%edi - -// -// do two non-quad-aligned pixels -// -2: - testb $2,%dh - jz 3f - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - andb $0x7f,%bl // mask 0-127 texture index - movb %dl,(%edi) // output pixel - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - andb $0x7f,%bl // mask 0-127 texture index - addl C(vidwidth),%edi - movb %dl,(%edi) // output pixel - - addl C(vidwidth),%edi - -// -// test if there was at least 4 pixels -// -3: - testb $0xFF,%ch // test quad count - jz vdone - -// -// ebp : ystep frac. upper 24 bits -// edx : y frac. upper 24 bits -// ebx : y i. lower 7 bits, masked for index -// ecx : ch = counter, cl = y step i. -// eax : colormap aligned 256 -// esi : source texture column -// edi : dest screen -// -v4quadloop: - movb $0x7f,%dh // prep mask -// .align 4 -vquadloop: - movb (%esi,%ebx),%al // prep loop - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - movb %dl,(%edi) // output pixel - andb $0x7f,%bl // mask 0-127 texture index - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -p1: movb %dl,0x12345678(%edi) - andb $0x7f,%bl - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -p2: movb %dl,2*0x12345678(%edi) - andb $0x7f,%bl - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -p3: movb %dl,3*0x12345678(%edi) - andb $0x7f,%bl - -p4: addl $4*0x12345678,%edi - - decb %ch - jnz vquadloop - -vdone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - -#ifdef HORIZONTALDRAW -// -------------------------------------------------------------------------- -// Horizontal Column Drawer Optimisation -// -------------------------------------------------------------------------- - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif -.globl C(R_DrawHColumn_8) -C(R_DrawHColumn_8): - pushl %ebp - pushl %esi - pushl %edi - pushl %ebx - -// -// dest = yhlookup[dc_x] + hcolumnofs[dc_yl]; -// - movl C(dc_x),%ebx - movl C(yhlookup)(,%ebx,4),%edi - movl C(dc_yl),%ebp - movl %ebp,%ebx - addl C(hcolumnofs)(,%ebx,4),%edi // edi = dest - -// -// pixelcount = yh - yl + 1 -// - movl C(dc_yh),%eax - incl %eax - subl %ebp,%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle vhdone // nothing to scale - -// -// frac = dc_texturemid - (centery-dc_yl)*fracstep; -// - movl C(dc_iscale),%ecx // fracstep - movl C(centery),%eax - subl %ebp,%eax - imul %ecx,%eax - movl C(dc_texturemid),%edx - subl %eax,%edx - movl %edx,%ebx - shrl $16,%ebx // frac int. - andl $0x0000007f,%ebx - shll $16,%edx // y frac up - - movl %ecx,%ebp - shll $16,%ebp // fracstep f. up - shrl $16,%ecx // fracstep i. ->cl - andb $0x7f,%cl - - movl C(dc_source),%esi - -// -// lets rock :) ! -// - - movl C(pixelcount),%eax - movb %al,%dh - shrl $2,%eax - movb %al,%ch // quad count - - testb %ch, %ch - jz vhnearlydone - - movl C(dc_colormap),%eax - decl %edi //----- - -vhloop: - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - andb $0x7f,%bl - incl %edi //----- - movb (%eax),%dh - movb %dh,(%edi) //----- - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - incl %edi //----- - adcb %cl,%bl - movb (%eax),%dl - andb $0x7f,%bl - movb %dl,(%edi) //----- - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl -// shll $16,%edx - andb $0x7f,%bl - incl %edi //----- - movb (%eax),%dh - movb %dh,(%edi) //----- - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - incl %edi //----- - adcb %cl,%bl - movb (%eax),%dl - andb $0x7f,%bl - movb %dl,(%edi) -// movl %edx,(%edi) -// addl $4,%edi - - decb %ch - jnz vhloop - -vhnearlydone: -// movl C(pixelcount) - -vhdone: - popl %ebx - popl %edi - popl %esi - popl %ebp - ret - - -// -------------------------------------------------------------------------- -// Rotate a buffer 90 degree in clockwise order after horiz.col. draws -// -------------------------------------------------------------------------- - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif -.globl C(R_RotateBuffer) -C(R_RotateBuffer): - pushl %ebp - pushl %esi - pushl %edi - pushl %ebx - - - movl C(dc_source),%esi - movl C(dc_colormap),%edi - - - movb (%esi),%ah - addl $200,%esi - movb (%ebx),%al - addl $200,%ebx - bswap %eax - movb (%esi),%ah - addl $200,%esi - movb (%ebx),%al - addl $200,%ebx - movl %eax,(%edi) - addl $4,%edi - - - popl %ebx - popl %edi - popl %esi - popl %ebp - ret -#endif - -//---------------------------------------------------------------------- -//13-02-98: -// R_DrawSkyColumn : same as R_DrawColumn but: -// -// - wrap around 256 instead of 127. -// this is needed because we have a higher texture for mouselook, -// we need at least 200 lines for the sky. -// -// NOTE: the sky should never wrap, so it could use a faster method. -// for the moment, we'll still use a wrapping method... -// -// IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!! -// -//---------------------------------------------------------------------- - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif -.globl C(R_DrawSkyColumn_8) -C(R_DrawSkyColumn_8): - pushl %ebp - pushl %esi - pushl %edi - pushl %ebx - -// -// dest = ylookup[dc_yl] + columnofs[dc_x]; -// - movl C(dc_yl),%ebp - movl %ebp,%ebx - movl C(ylookup)(,%ebx,4),%edi - movl C(dc_x),%ebx - addl C(columnofs)(,%ebx,4),%edi // edi = dest - -// -// pixelcount = yh - yl + 1 -// - movl C(dc_yh),%eax - incl %eax - subl %ebp,%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle vskydone // nothing to scale - -// -// frac = dc_texturemid - (centery-dc_yl)*fracstep; -// - movl C(dc_iscale),%ecx // fracstep - movl C(centery),%eax - subl %ebp,%eax - imul %ecx,%eax - movl C(dc_texturemid),%edx - subl %eax,%edx - movl %edx,%ebx - shrl $16,%ebx // frac int. - andl $0x000000ff,%ebx - shll $16,%edx // y frac up - - movl %ecx,%ebp - shll $16,%ebp // fracstep f. up - shrl $16,%ecx // fracstep i. ->cl - - movl C(dc_source),%esi - -// -// lets rock :) ! -// - movl C(pixelcount),%eax - movb %al,%dh - shrl $2,%eax - movb %al,%ch // quad count - movl C(dc_colormap),%eax - testb $3,%dh - jz v4skyquadloop - -// -// do un-even pixel -// - testb $1,%dh - jz 2f - - movb (%esi,%ebx),%al // prep un-even loops - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - movb %dl,(%edi) // output pixel - addl C(vidwidth),%edi - -// -// do two non-quad-aligned pixels -// -2: - testb $2,%dh - jz 3f - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - movb %dl,(%edi) // output pixel - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - addl C(vidwidth),%edi - movb %dl,(%edi) // output pixel - - addl C(vidwidth),%edi - -// -// test if there was at least 4 pixels -// -3: - testb $0xFF,%ch // test quad count - jz vskydone - -// -// ebp : ystep frac. upper 24 bits -// edx : y frac. upper 24 bits -// ebx : y i. lower 7 bits, masked for index -// ecx : ch = counter, cl = y step i. -// eax : colormap aligned 256 -// esi : source texture column -// edi : dest screen -// -v4skyquadloop: -// .align 4 -vskyquadloop: - movb (%esi,%ebx),%al // prep loop - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - movb %dl,(%edi) // output pixel - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -p1b: movb %dl,0x12345678(%edi) - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -p2b: movb %dl,2*0x12345678(%edi) - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -p3b: movb %dl,3*0x12345678(%edi) - -p4b: addl $4*0x12345678,%edi - - decb %ch - jnz vskyquadloop - -vskydone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - - - -//---------------------------------------------------------------------- -// -// R_DrawSpan -// -// Horizontal texture mapping -// -//---------------------------------------------------------------------- - - .data - -ystep: .long 0 -xstep: .long 0 -C(texwidth): .long 64 // texture width -#if !defined( LINUX) - .text -#endif -#ifdef LINUX - .align 2 -#else - .align 4 -#endif -.globl C(R_DrawSpan_8) -C(R_DrawSpan_8): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - - -// -// find loop count -// - movl C(ds_x2),%eax - incl %eax - subl C(ds_x1),%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - js hdone // nothing to scale - shrl $1,%eax // double pixel count - movl %eax,C(loopcount) - -// -// build composite position -// - movl C(ds_xfrac),%ebp - shll $10,%ebp - andl $0x0ffff0000,%ebp - movl C(ds_yfrac),%eax - shrl $6,%eax - andl $0x0ffff,%eax - movl C(ds_y),%edi - orl %eax,%ebp - - movl C(ds_source),%esi - -// -// calculate screen dest -// - - movl C(ylookup)(,%edi,4),%edi - movl C(ds_x1),%eax - addl C(columnofs)(,%eax,4),%edi - -// -// build composite step -// - movl C(ds_xstep),%ebx - shll $10,%ebx - andl $0x0ffff0000,%ebx - movl C(ds_ystep),%eax - shrl $6,%eax - andl $0x0ffff,%eax - orl %eax,%ebx - - //movl %eax,OFFSET hpatch1+2 // convice tasm to modify code... - movl %ebx,hpatch1+2 - //movl %eax,OFFSET hpatch2+2 // convice tasm to modify code... - movl %ebx,hpatch2+2 - movl %esi,hpatch3+2 - movl %esi,hpatch4+2 -// %eax aligned colormap -// %ebx aligned colormap -// %ecx,%edx scratch -// %esi virtual source -// %edi moving destination pointer -// %ebp frac - movl C(ds_colormap),%eax -// shld $22,%ebp,%ecx // begin calculating third pixel (y units) -// shld $6,%ebp,%ecx // begin calculating third pixel (x units) - movl %ebp,%ecx - addl %ebx,%ebp // advance frac pointer - shrw $10,%cx - roll $6,%ecx - andl $4095,%ecx // finish calculation for third pixel -// shld $22,%ebp,%edx // begin calculating fourth pixel (y units) -// shld $6,%ebp,%edx // begin calculating fourth pixel (x units) - movl %ebp,%edx - shrw $10,%dx - roll $6,%edx - addl %ebx,%ebp // advance frac pointer - andl $4095,%edx // finish calculation for fourth pixel - movl %eax,%ebx - movb (%esi,%ecx),%al // get first pixel - movb (%esi,%edx),%bl // get second pixel - testl $0x0fffffffe,C(pixelcount) - movb (%eax),%dl // color translate first pixel - -// jnz hdoubleloop // at least two pixels to map -// jmp hchecklast - -// movw $0xf0f0,%dx //see visplanes start - - jz hchecklast - movb (%ebx),%dh // color translate second pixel - movl C(loopcount),%esi -// .align 4 -hdoubleloop: -// shld $22,%ebp,%ecx // begin calculating third pixel (y units) -// shld $6,%ebp,%ecx // begin calculating third pixel (x units) - movl %ebp,%ecx - shrw $10,%cx - roll $6,%ecx -hpatch1: - addl $0x012345678,%ebp // advance frac pointer - movw %dx,(%edi) // write first pixel - andl $4095,%ecx // finish calculation for third pixel -// shld $22,%ebp,%edx // begin calculating fourth pixel (y units) -// shld $6,%ebp,%edx // begin calculating fourth pixel (x units) - movl %ebp,%edx - shrw $10,%dx - roll $6,%edx -hpatch3: - movb 0x012345678(%ecx),%al // get third pixel -// movb %bl,1(%edi) // write second pixel - andl $4095,%edx // finish calculation for fourth pixel -hpatch2: - addl $0x012345678,%ebp // advance frac pointer -hpatch4: - movb 0x012345678(%edx),%bl // get fourth pixel - movb (%eax),%dl // color translate third pixel - addl $2,%edi // advance to third pixel destination - decl %esi // done with loop? - movb (%ebx),%dh // color translate fourth pixel - jnz hdoubleloop - -// check for final pixel -hchecklast: - testl $1,C(pixelcount) - jz hdone - movb %dl,(%edi) // write final pixel - -hdone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - - -//.endif - - -//---------------------------------------------------------------------- -// R_DrawTransColumn -// -// Vertical column texture drawer, with transparency. Replaces Doom2's -// 'fuzz' effect, which was not so beautiful. -// Transparency is always impressive in some way, don't know why... -//---------------------------------------------------------------------- - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif - -.globl C(R_DrawTranslucentColumn_8) -C(R_DrawTranslucentColumn_8): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - -// -// dest = ylookup[dc_yl] + columnofs[dc_x]; -// - movl C(dc_yl),%ebp - movl %ebp,%ebx - movl C(ylookup)(,%ebx,4),%edi - movl C(dc_x),%ebx - addl C(columnofs)(,%ebx,4),%edi // edi = dest - -// -// pixelcount = yh - yl + 1 -// - movl C(dc_yh),%eax - incl %eax - subl %ebp,%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle vtdone // nothing to scale - -// -// frac = dc_texturemid - (centery-dc_yl)*fracstep; -// - movl C(dc_iscale),%ecx // fracstep - movl C(centery),%eax - subl %ebp,%eax - imul %ecx,%eax - movl C(dc_texturemid),%edx - subl %eax,%edx - movl %edx,%ebx - - shrl $16,%ebx // frac int. - andl $0x0000007f,%ebx - shll $16,%edx // y frac up - - movl %ecx,%ebp - shll $16,%ebp // fracstep f. up - shrl $16,%ecx // fracstep i. ->cl - andb $0x7f,%cl - pushw %cx - movl %edx,%ecx - popw %cx - movl C(dc_colormap),%edx - movl C(dc_source),%esi - -// -// lets rock :) ! -// - movl C(pixelcount),%eax - shrl $2,%eax - testb $0x03,C(pixelcount) - movb %al,%ch // quad count - movl C(dc_transmap),%eax - jz vt4quadloop -// -// do un-even pixel -// - testb $1,C(pixelcount) - jz 2f - - movb (%esi,%ebx),%ah // fetch texel : colormap number - addl %ebp,%ecx - adcb %cl,%bl - movb (%edi),%al // fetch dest : index into colormap - andb $0x7f,%bl - movb (%eax),%dl - movb (%edx), %dl // use colormap now ! - movb %dl,(%edi) - addl C(vidwidth),%edi -// -// do two non-quad-aligned pixels -// -2: - testb $2,C(pixelcount) - jz 3f - - movb (%esi,%ebx),%ah // fetch texel : colormap number - addl %ebp,%ecx - adcb %cl,%bl - movb (%edi),%al // fetch dest : index into colormap - andb $0x7f,%bl - movb (%eax),%dl - movb (%edx), %dl // use colormap now ! - movb %dl,(%edi) - addl C(vidwidth),%edi - - movb (%esi,%ebx),%ah // fetch texel : colormap number - addl %ebp,%ecx - adcb %cl,%bl - movb (%edi),%al // fetch dest : index into colormap - andb $0x7f,%bl - movb (%eax),%dl - movb (%edx), %dl // use colormap now ! - movb %dl,(%edi) - addl C(vidwidth),%edi - -// -// test if there was at least 4 pixels -// -3: - testb $0xFF,%ch // test quad count - jz vtdone - -// -// tystep : ystep frac. upper 24 bits -// edx : upper 24 bit : colomap -// dl : tmp pixel to write -// ebx : y i. lower 7 bits, masked for index -// ecx : y frac. upper 16 bits -// ecx : ch = counter, cl = y step i. -// eax : transmap aligned 65535 (upper 16 bit) -// ah : background pixel (from the screen buffer) -// al : foreground pixel (from the texture) -// esi : source texture column -// ebp,edi : dest screen -// -vt4quadloop: - movb (%esi,%ebx),%ah // fetch texel : colormap number -p5: movb 0x12345678(%edi),%al // fetch dest : index into colormap - - movl %ebp,C(tystep) - movl %edi,%ebp - subl C(vidwidth),%edi - jmp inloop -// .align 4 -vtquadloop: - addl C(tystep),%ecx - adcb %cl,%bl -p6: addl $2*0x12345678,%ebp - andb $0x7f,%bl - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb (%edx), %dl // use colormap now ! - movb %dl,(%edi) - movb (%ebp),%al // fetch dest : index into colormap -inloop: - addl C(tystep),%ecx - adcb %cl,%bl -p7: addl $2*0x12345678,%edi - andb $0x7f,%bl - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb (%edx), %dl // use colormap now ! - movb %dl,(%ebp) - movb (%edi),%al // fetch dest : index into colormap - - addl C(tystep),%ecx - adcb %cl,%bl -p8: addl $2*0x12345678,%ebp - andb $0x7f,%bl - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb (%edx), %dl // use colormap now ! - movb %dl,(%edi) - movb (%ebp),%al // fetch dest : index into colormap - - addl C(tystep),%ecx - adcb %cl,%bl -p9: addl $2*0x12345678,%edi - andb $0x7f,%bl - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb (%edx), %dl // use colormap now ! - movb %dl,(%ebp) - movb (%edi),%al // fetch dest : index into colormap - - decb %ch - jnz vtquadloop - -vtdone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - -#endif // ifdef USEASM - - - -//---------------------------------------------------------------------- -// R_DrawShadeColumn -// -// for smoke..etc.. test. -//---------------------------------------------------------------------- - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif -.globl C(R_DrawShadeColumn_8) -C(R_DrawShadeColumn_8): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - -// -// dest = ylookup[dc_yl] + columnofs[dc_x]; -// - movl C(dc_yl),%ebp - movl %ebp,%ebx - movl C(ylookup)(,%ebx,4),%edi - movl C(dc_x),%ebx - addl C(columnofs)(,%ebx,4),%edi // edi = dest - -// -// pixelcount = yh - yl + 1 -// - movl C(dc_yh),%eax - incl %eax - subl %ebp,%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle shdone // nothing to scale - -// -// frac = dc_texturemid - (centery-dc_yl)*fracstep; -// - movl C(dc_iscale),%ecx // fracstep - movl C(centery),%eax - subl %ebp,%eax - imul %ecx,%eax - movl C(dc_texturemid),%edx - subl %eax,%edx - movl %edx,%ebx - shrl $16,%ebx // frac int. - andl $0x0000007f,%ebx - shll $16,%edx // y frac up - - movl %ecx,%ebp - shll $16,%ebp // fracstep f. up - shrl $16,%ecx // fracstep i. ->cl - andb $0x7f,%cl - - movl C(dc_source),%esi - -// -// lets rock :) ! -// - movl C(pixelcount),%eax - movb %al,%dh - shrl $2,%eax - movb %al,%ch // quad count - movl C(colormaps),%eax - testb $0x03,%dh - jz sh4quadloop - -// -// do un-even pixel -// - testb $1,%dh - jz 2f - - movb (%esi,%ebx),%ah // fetch texel : colormap number - addl %ebp,%edx - adcb %cl,%bl - movb (%edi),%al // fetch dest : index into colormap - andb $0x7f,%bl - movb (%eax),%dl - movb %dl,(%edi) - addl C(vidwidth),%edi - -// -// do two non-quad-aligned pixels -// -2: - testb $2,%dh - jz 3f - - movb (%esi,%ebx),%ah // fetch texel : colormap number - addl %ebp,%edx - adcb %cl,%bl - movb (%edi),%al // fetch dest : index into colormap - andb $0x7f,%bl - movb (%eax),%dl - movb %dl,(%edi) - addl C(vidwidth),%edi - - movb (%esi,%ebx),%ah // fetch texel : colormap number - addl %ebp,%edx - adcb %cl,%bl - movb (%edi),%al // fetch dest : index into colormap - andb $0x7f,%bl - movb (%eax),%dl - movb %dl,(%edi) - addl C(vidwidth),%edi - -// -// test if there was at least 4 pixels -// -3: - testb $0xFF,%ch // test quad count - jz shdone - -// -// ebp : ystep frac. upper 24 bits -// edx : y frac. upper 24 bits -// ebx : y i. lower 7 bits, masked for index -// ecx : ch = counter, cl = y step i. -// eax : colormap aligned 256 -// esi : source texture column -// edi : dest screen -// -sh4quadloop: - movb $0x7f,%dh // prep mask - - movb (%esi,%ebx),%ah // fetch texel : colormap number -sh5: movb 0x12345678(%edi),%al // fetch dest : index into colormap - - movl %ebp,C(tystep) - movl %edi,%ebp - subl C(vidwidth),%edi - jmp shinloop -// .align 4 -shquadloop: - addl C(tystep),%edx - adcb %cl,%bl - andb %dh,%bl -sh6: addl $2*0x12345678,%ebp - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb %dl,(%edi) - movb (%ebp),%al // fetch dest : index into colormap -shinloop: - addl C(tystep),%edx - adcb %cl,%bl - andb %dh,%bl -sh7: addl $2*0x12345678,%edi - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb %dl,(%ebp) - movb (%edi),%al // fetch dest : index into colormap - - addl C(tystep),%edx - adcb %cl,%bl - andb %dh,%bl -sh8: addl $2*0x12345678,%ebp - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb %dl,(%edi) - movb (%ebp),%al // fetch dest : index into colormap - - addl C(tystep),%edx - adcb %cl,%bl - andb %dh,%bl -sh9: addl $2*0x12345678,%edi - movb (%eax),%dl - movb (%esi,%ebx),%ah // fetch texel : colormap number - movb %dl,(%ebp) - movb (%edi),%al // fetch dest : index into colormap - - decb %ch - jnz shquadloop - -shdone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - - - -//---------------------------------------------------------------------- -// -// R_DrawWaterColumn : basically it's just a copy of R_DrawColumn, -// but it uses dc_colormap from dc_yl to dc_yw-1 -// then it uses dc_wcolormap from dc_yw to dc_yh -// -// Thus, the 'underwater' part of the walls is remapped to 'water-like' -// colors. -// -//---------------------------------------------------------------------- - -#ifdef LINUX - .align 2 -#else - .align 5 -#endif -.globl C(R_DrawWaterColumn) -C(R_DrawWaterColumn): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - -// -// dest = ylookup[dc_yl] + columnofs[dc_x]; -// - movl C(dc_yl),%ebp - movl %ebp,%ebx - movl C(ylookup)(,%ebx,4),%edi - movl C(dc_x),%ebx - addl C(columnofs)(,%ebx,4),%edi // edi = dest - -// -// pixelcount = yh - yl + 1 -// - movl C(dc_yh),%eax - incl %eax - subl %ebp,%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle wdone // nothing to scale - -// -// frac = dc_texturemid - (centery-dc_yl)*fracstep; -// - movl C(dc_iscale),%ecx // fracstep - movl C(centery),%eax - subl %ebp,%eax - imul %ecx,%eax - movl C(dc_texturemid),%edx - subl %eax,%edx - movl %edx,%ebx - shrl $16,%ebx // frac int. - andl $0x0000007f,%ebx - shll $16,%edx // y frac up - - movl %ecx,%ebp - shll $16,%ebp // fracstep f. up - shrl $16,%ecx // fracstep i. ->cl - andb $0x7f,%cl - - movl C(dc_source),%esi - -// -// lets rock :) ! -// - movl C(pixelcount),%eax - movb %al,%dh - shrl $2,%eax - movb %al,%ch // quad count - movl C(dc_wcolormap),%eax - testb $3,%dh - jz w4quadloop - -// -// do un-even pixel -// - testb $1,%dh - jz 2f - - movb (%esi,%ebx),%al // prep un-even loops - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - andb $0x7f,%bl // mask 0-127 texture index - movb %dl,(%edi) // output pixel - addl C(vidwidth),%edi - -// -// do two non-quad-aligned pixels -// -2: - testb $2,%dh - jz 3f - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - andb $0x7f,%bl // mask 0-127 texture index - movb %dl,(%edi) // output pixel - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - andb $0x7f,%bl // mask 0-127 texture index - addl C(vidwidth),%edi - movb %dl,(%edi) // output pixel - - addl C(vidwidth),%edi - -// -// test if there was at least 4 pixels -// -3: - testb $0xFF,%ch // test quad count - jz wdone - -// -// ebp : ystep frac. upper 24 bits -// edx : y frac. upper 24 bits -// ebx : y i. lower 7 bits, masked for index -// ecx : ch = counter, cl = y step i. -// eax : colormap aligned 256 -// esi : source texture column -// edi : dest screen -// -w4quadloop: - movb $0x7f,%dh // prep mask -// .align 4 -wquadloop: - movb (%esi,%ebx),%al // prep loop - addl %ebp,%edx // ypos f += ystep f - adcb %cl,%bl // ypos i += ystep i - movb (%eax),%dl // colormap texel - movb %dl,(%edi) // output pixel - andb $0x7f,%bl // mask 0-127 texture index - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -w1: movb %dl,0x12345678(%edi) - andb $0x7f,%bl - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -w2: movb %dl,2*0x12345678(%edi) - andb $0x7f,%bl - - movb (%esi,%ebx),%al // fetch source texel - addl %ebp,%edx - adcb %cl,%bl - movb (%eax),%dl -w3: movb %dl,3*0x12345678(%edi) - andb $0x7f,%bl - -w4: addl $4*0x12345678,%edi - - decb %ch - jnz wquadloop - -wdone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - - - - - - - -//---------------------------------------------------------------------- -// -// R_DrawSpanNoWrap -// -// Horizontal texture mapping, does not remap colors, -// neither needs to wrap around the source texture. -// -// Thus, a special optimisation can be used... -// -//---------------------------------------------------------------------- - - .data - -advancetable: .long 0, 0 -#if !defined( LINUX) - .text -#endif -#ifdef LINUX - .align 2 -#else - .align 4 -#endif -.globl C(R_DrawSpanNoWrap) -C(R_DrawSpanNoWrap): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - -// -// find loop count -// - - movl C(ds_x2),%eax - incl %eax - subl C(ds_x1),%eax // pixel count - movl %eax,C(pixelcount) // save for final pixel - jle htvdone // nothing to scale -// shrl $1,%eax // double pixel count -// movl %eax,C(loopcount) - -// -// calculate screen dest -// - - movl C(ds_y),%edi //full destination start address - -// -// set up advancetable -// - - movl C(ds_xstep),%ebp - movl C(ds_ystep),%ecx - movl %ecx,%eax - movl %ebp,%edx - sarl $16,%edx // xstep >>= 16; - movl C(vidwidth),%ebx - sarl $16,%eax // ystep >>= 16; - jz 0f - imull %ebx,%eax // (ystep >> 16) * texwidth; -0: - addl %edx,%eax // add in xstep - // (ystep >> 16) * texwidth + (xstep >> 16); - - movl %eax,advancetable+4 // advance base in y - addl %ebx,%eax // ((ystep >> 16) + 1) * texwidth + - // (xstep >> 16); - movl %eax,advancetable // advance extra in y - - shll $16,%ebp // left-justify xstep fractional part - movl %ebp,xstep - shll $16,%ecx // left-justify ystep fractional part - movl %ecx,ystep - -// -// calculate the texture starting address -// - movl C(ds_source),%esi // texture source - - movl C(ds_yfrac),%eax - movl %eax,%edx - sarl $16,%eax - movl C(ds_xfrac),%ecx - imull %ebx,%eax // (yfrac >> 16) * texwidth - movl %ecx,%ebx - sarl $16,%ecx - movl %ecx,%ebp - addl %eax,%ebp // source = (xfrac >> 16) + - // ((yfrac >> 16) * texwidth); - -// -// esi : texture source -// edi : screen dest -// eax : colormap aligned on 256 boundary, hehehe... -// ebx : xfrac << 16 -// ecx : used in loop, contains either 0 or -1, *4, offset into advancetable -// edx : yfrac << 16 -// ebp : offset into texture -// - - shll $16,%edx // yfrac upper word, lower byte will be used - movl C(ds_colormap),%eax - shll $16,%ebx // xfrac upper word, lower unused - - movl C(pixelcount),%ecx - shrl $2,%ecx - movb %cl,%dh // quad pixels count - - movl C(pixelcount),%ecx - andl $3,%ecx - jz htvquadloop // pixelcount is multiple of 4 - decl %ecx - jz 1f - decl %ecx - jz 2f - -// -// do one to three pixels first -// - addl ystep,%edx // yfrac += ystep - sbbl %ecx,%ecx // turn carry into 0 or -1 if set - movb (%esi,%ebp),%al // get texture pixel - addl xstep,%ebx // xfrac += xstep -// movb (%eax),%dl // pixel goes through colormap - adcl advancetable+4(,%ecx,4),%ebp // advance source - movb %al,(%edi) // write pixel dest - - incl %edi - -2: - addl ystep,%edx // yfrac += ystep - sbbl %ecx,%ecx // turn carry into 0 or -1 if set - movb (%esi,%ebp),%al // get texture pixel - addl xstep,%ebx // xfrac += xstep -// movb (%eax),%dl // pixel goes through colormap - adcl advancetable+4(,%ecx,4),%ebp // advance source - movb %al,(%edi) // write pixel dest - - incl %edi - -1: - addl ystep,%edx // yfrac += ystep - sbbl %ecx,%ecx // turn carry into 0 or -1 if set - movb (%esi,%ebp),%al // get texture pixel - addl xstep,%ebx // xfrac += xstep -// movb (%eax),%dl // pixel goes through colormap - adcl advancetable+4(,%ecx,4),%ebp // advance source - movb %al,(%edi) // write pixel dest - - incl %edi - -// -// test if there was at least 4 pixels -// - testb $0xFF,%dh - jz htvdone - -// -// two pixels per loop -// U -// V -htvquadloop: - addl ystep,%edx // yfrac += ystep - sbbl %ecx,%ecx // turn carry into 0 or -1 if set - movb (%esi,%ebp),%al // get texture pixel - addl xstep,%ebx // xfrac += xstep -// movb (%eax),%dl // pixel goes through colormap - adcl advancetable+4(,%ecx,4),%ebp // advance source - movb %al,(%edi) // write pixel dest - - addl ystep,%edx - sbbl %ecx,%ecx - movb (%esi,%ebp),%al - addl xstep,%ebx -// movb (%eax),%dl - adcl advancetable+4(,%ecx,4),%ebp - movb %al,1(%edi) - - addl ystep,%edx - sbbl %ecx,%ecx - movb (%esi,%ebp),%al - addl xstep,%ebx -// movb (%eax),%dl - adcl advancetable+4(,%ecx,4),%ebp - movb %al,2(%edi) - - addl ystep,%edx - sbbl %ecx,%ecx - movb (%esi,%ebp),%al - addl xstep,%ebx -// movb (%eax),%dl - adcl advancetable+4(,%ecx,4),%ebp - movb %al,3(%edi) - - addl $4, %edi - incl %ecx //dummy - - decb %dh - jnz htvquadloop // paire dans V-pipe - -htvdone: - popl %ebx // restore register variables - popl %edi - popl %esi - popl %ebp // restore caller's stack frame pointer - ret - - -//.endif - -#ifdef HORIZONTALDRAW -// void R_RotateBuffere (void) - -#ifdef LINUX - .align 2 -#else - .align 4 -#endif -.globl C(R_RotateBufferasm) -C(R_RotateBufferasm): - pushl %ebp // preserve caller's stack frame pointer - pushl %esi // preserve register variables - pushl %edi - pushl %ebx - - movl C(dc_source),%esi - movl C(dc_colormap),%edi - - movl $200,%edx -ra2: - movl $40,%ecx -ra: - movb -2*200(%esi),%al - movb -6*200(%esi),%bl - movb -3*200(%esi),%ah - movb -7*200(%esi),%bh - shll $16,%eax - shll $16,%ebx - movb (%esi),%al - movb -4*200(%esi),%bl - movb -1*200(%esi),%ah - movb -5*200(%esi),%bh - movl %eax,(%edi) - subl $8*200,%esi - movl %ebx,4(%edi) - addl $8,%edi - decl %ecx - jnz ra - - addl $320*200+1,%esi //32*480 passe a la ligne suivante -// addl 320-32,%edi - - decl %edx - jnz ra2 - - pop %ebp // preserve caller's stack frame pointer - pop %esi // preserve register variables - pop %edi - pop %ebx - ret -#endif diff --git a/src/tmap_asm.s b/src/tmap_asm.s deleted file mode 100644 index d8967178cdf28e3b9bedbda863232ef0bf0978d4..0000000000000000000000000000000000000000 --- a/src/tmap_asm.s +++ /dev/null @@ -1,322 +0,0 @@ -// SONIC ROBO BLAST 2 -//----------------------------------------------------------------------------- -// Copyright (C) 1998-2000 by DooM Legacy Team. -// Copyright (C) 1999-2023 by Sonic Team Junior. -// -// This program is free software distributed under the -// terms of the GNU General Public License, version 2. -// See the 'LICENSE' file for more details. -//----------------------------------------------------------------------------- -/// \file tmap_asm.s -/// \brief ??? - -//.comm _dc_colormap,4 -//.comm _dc_x,4 -//.comm _dc_yl,4 -//.comm _dc_yh,4 -//.comm _dc_iscale,4 -//.comm _dc_texturemid,4 -//.comm _dc_source,4 -//.comm _ylookup,4 -//.comm _columnofs,4 -//.comm _loopcount,4 -//.comm _pixelcount,4 -.data -_pixelcount: -.long 0x00000000 -_loopcount: -.long 0x00000000 -.align 8 -_mmxcomm: -.long 0x00000000 -.text - - .align 4 -.globl _R_DrawColumn8_NOMMX -_R_DrawColumn8_NOMMX: - pushl %ebp - pushl %esi - pushl %edi - pushl %ebx - movl _dc_yl,%edx - movl _dc_yh,%eax - subl %edx,%eax - leal 1(%eax),%ebx - testl %ebx,%ebx - jle rdc8ndone - movl _dc_x,%eax - movl _ylookup, %edi - movl (%edi,%edx,4),%esi - movl _columnofs, %edi - addl (%edi,%eax,4),%esi - movl _dc_iscale,%edi - movl %edx,%eax - imull %edi,%eax - movl _dc_texturemid,%ecx - addl %eax,%ecx - - movl _dc_source,%ebp - xorl %edx, %edx - subl $0x12345678, %esi -.globl rdc8nwidth1 -rdc8nwidth1: - .align 4,0x90 -rdc8nloop: - movl %ecx,%eax - shrl $16,%eax - addl %edi,%ecx - andl $127,%eax - addl $0x12345678,%esi -.globl rdc8nwidth2 -rdc8nwidth2: - movb (%eax,%ebp),%dl - movl _dc_colormap,%eax - movb (%eax,%edx),%al - movb %al,(%esi) - decl %ebx - jne rdc8nloop -rdc8ndone: - popl %ebx - popl %edi - popl %esi - popl %ebp - ret - -// -// Optimised specifically for P54C/P55C (aka Pentium with/without MMX) -// By ES 1998/08/01 -// - -.globl _R_DrawColumn_8_Pentium -_R_DrawColumn_8_Pentium: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl _dc_yl,%eax // Top pixel - movl _dc_yh,%ebx // Bottom pixel - movl _ylookup, %edi - movl (%edi,%ebx,4),%ecx - subl %eax,%ebx // ebx=number of pixels-1 - jl rdc8pdone // no pixel to draw, done - jnz rdc8pmany - movl _dc_x,%edx // Special case: only one pixel - movl _columnofs, %edi - addl (%edi,%edx,4),%ecx // dest pixel at (%ecx) - movl _dc_iscale,%esi - imull %esi,%eax - movl _dc_texturemid,%edi - addl %eax,%edi // texture index in edi - movl _dc_colormap,%edx - shrl $16, %edi - movl _dc_source,%ebp - andl $127,%edi - movb (%edi,%ebp),%dl // read texture pixel - movb (%edx),%al // lookup for light - movb %al,0(%ecx) // write it - jmp rdc8pdone // done! -.align 4, 0x90 -rdc8pmany: // draw >1 pixel - movl _dc_x,%edx - movl _columnofs, %edi - movl (%edi,%edx,4),%edx - leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom -.globl rdc8pwidth5 -rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH - movl _dc_iscale,%edx // edx = fracstep - imull %edx,%eax - shll $9, %edx // fixme: Should get 7.25 fix as input - movl _dc_texturemid,%ecx - addl %eax,%ecx // ecx = frac - movl _dc_colormap,%eax // eax = lighting/special effects LUT - shll $9, %ecx - movl _dc_source,%esi // esi = source ptr - - imull $0x12345678, %ebx // ebx = negative offset to pixel -.globl rdc8pwidth6 -rdc8pwidth6: // DeadBeef = -SCREENWIDTH - -// Begin the calculation of the two first pixels - leal (%ecx, %edx), %ebp - shrl $25, %ecx - movb (%esi, %ecx), %al - leal (%edx, %ebp), %ecx - shrl $25, %ebp - movb (%eax), %dl - -// The main loop -rdc8ploop: - movb (%esi,%ebp), %al // load 1 - leal (%ecx, %edx), %ebp // calc frac 3 - - shrl $25, %ecx // shift frac 2 - movb %dl, 0x12345678(%edi, %ebx)// store 0 -.globl rdc8pwidth1 -rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH - - movb (%eax), %al // lookup 1 - - movb %al, 0x12345678(%edi, %ebx)// store 1 -.globl rdc8pwidth2 -rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH - movb (%esi, %ecx), %al // load 2 - - leal (%ebp, %edx), %ecx // calc frac 4 - - shrl $25, %ebp // shift frac 3 - movb (%eax), %dl // lookup 2 - - addl $0x12345678, %ebx // counter -.globl rdc8pwidth3 -rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH - jl rdc8ploop // loop - -// End of loop. Write extra pixel or just exit. - jnz rdc8pdone - movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel -.globl rdc8pwidth4 -rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH - -rdc8pdone: - - popl %edi - popl %esi - popl %ebx - popl %ebp - ret - -// -// MMX asm version, optimised for K6 -// By ES 1998/07/05 -// - -.globl _R_DrawColumn_8_K6_MMX -_R_DrawColumn_8_K6_MMX: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8 - andl $7,%eax - addl $8,%eax - movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead - subl %eax,%esp - - movl _dc_yl,%edx // Top pixel - movl _dc_yh,%ebx // Bottom pixel - movl _ylookup, %edi - movl (%edi,%ebx,4),%ecx - subl %edx,%ebx // ebx=number of pixels-1 - jl 0x12345678 // no pixel to draw, done -.globl rdc8moffs1 -rdc8moffs1: - jnz rdc8mmany - movl _dc_x,%eax // Special case: only one pixel - movl _columnofs, %edi - addl (%edi,%eax,4),%ecx // dest pixel at (%ecx) - movl _dc_iscale,%esi - imull %esi,%edx - movl _dc_texturemid,%edi - addl %edx,%edi // texture index in edi - movl _dc_colormap,%edx - shrl $16, %edi - movl _dc_source,%ebp - andl $127,%edi - movb (%edi,%ebp),%dl // read texture pixel - movb (%edx),%al // lookup for light - movb %al,0(%ecx) // write it - jmp rdc8mdone // done! -.globl rdc8moffs2 -rdc8moffs2: -.align 4, 0x90 -rdc8mmany: // draw >1 pixel - movl _dc_x,%eax - movl _columnofs, %edi - movl (%edi,%eax,4),%eax - leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom -.globl rdc8mwidth3 -rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH - movl _dc_iscale,%ecx // ecx = fracstep - imull %ecx,%edx - shll $9, %ecx // fixme: Should get 7.25 fix as input - movl _dc_texturemid,%eax - addl %edx,%eax // eax = frac - movl _dc_colormap,%edx // edx = lighting/special effects LUT - shll $9, %eax - leal (%ecx, %ecx), %edi - movl _dc_source,%ebp // ebp = source ptr - movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs - - imull $0x12345678, %ebx // ebx = negative offset to pixel -.globl rdc8mwidth5 -rdc8mwidth5: // DeadBeef = -SCREENWIDTH - - movl %edi, 4(%esp) - leal (%eax, %ecx), %edi - movq 0(%esp), %mm1 // fracstep:fracstep in mm1 - movl %eax, 0(%esp) - shrl $25, %eax - movl %edi, 4(%esp) - movzbl (%ebp, %eax), %eax - movq 0(%esp), %mm0 // frac:frac in mm0 - - paddd %mm1, %mm0 - shrl $25, %edi - movq %mm0, %mm2 - psrld $25, %mm2 // texture index in mm2 - paddd %mm1, %mm0 - movq %mm2, 0(%esp) - -.globl rdc8mloop -rdc8mloop: // The main loop - movq %mm0, %mm2 // move 4-5 to temp reg - movzbl (%ebp, %edi), %edi // read 1 - - psrld $25, %mm2 // shift 4-5 - movb (%edx,%eax), %cl // lookup 0 - - movl 0(%esp), %eax // load 2 - addl $0x12345678, %ebx // counter -.globl rdc8mwidth2 -rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH - - movb %cl, (%esi, %ebx) // write 0 - movb (%edx,%edi), %ch // lookup 1 - - movb %ch, 0x12345678(%esi, %ebx) // write 1 -.globl rdc8mwidth1 -rdc8mwidth1: // DeadBeef = SCREENWIDTH - movl 4(%esp), %edi // load 3 - - paddd %mm1, %mm0 // frac 6-7 - movzbl (%ebp, %eax), %eax // lookup 2 - - movq %mm2, 0(%esp) // store texture index 4-5 - jl rdc8mloop - - jnz rdc8mno_odd - movb (%edx,%eax), %cl // write the last odd pixel - movb %cl, 0x12345678(%esi) -.globl rdc8mwidth4 -rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH -rdc8mno_odd: - -.globl rdc8mdone -rdc8mdone: - emms - - addl _mmxcomm, %esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret - -// Need some extra space to align run-time -.globl R_DrawColumn_8_K6_MMX_end -R_DrawColumn_8_K6_MMX_end: -nop;nop;nop;nop;nop;nop;nop;nop; -nop;nop;nop;nop;nop;nop;nop;nop; -nop;nop;nop;nop;nop;nop;nop;nop; -nop;nop;nop;nop;nop;nop;nop; diff --git a/src/tmap_mmx.nas b/src/tmap_mmx.nas deleted file mode 100644 index a45667e23d539997193e0df23862dba71458c6f6..0000000000000000000000000000000000000000 --- a/src/tmap_mmx.nas +++ /dev/null @@ -1,674 +0,0 @@ -;; SONIC ROBO BLAST 2 -;;----------------------------------------------------------------------------- -;; Copyright (C) 1998-2000 by DOSDOOM. -;; Copyright (C) 2010-2023 by Sonic Team Junior. -;; -;; This program is free software distributed under the -;; terms of the GNU General Public License, version 2. -;; See the 'LICENSE' file for more details. -;;----------------------------------------------------------------------------- -;; FILE: -;; tmap_mmx.nas -;; DESCRIPTION: -;; Assembler optimised rendering code for software mode, using SIMD -;; instructions. -;; Draw wall columns. - - -[BITS 32] - -%define FRACBITS 16 -%define TRANSPARENTPIXEL 255 - -%ifdef LINUX -%macro cextern 1 -[extern %1] -%endmacro - -%macro cglobal 1 -[global %1] -%endmacro - -%else -%macro cextern 1 -%define %1 _%1 -[extern %1] -%endmacro - -%macro cglobal 1 -%define %1 _%1 -[global %1] -%endmacro - -%endif - - -; The viddef_s structure. We only need the width field. -struc viddef_s - resb 12 -.width: resb 4 - resb 44 -endstruc - - -;; externs -;; columns -cextern dc_colormap -cextern dc_x -cextern dc_yl -cextern dc_yh -cextern dc_iscale -cextern dc_texturemid -cextern dc_texheight -cextern dc_source -cextern dc_hires -cextern centery -cextern centeryfrac -cextern dc_transmap - -cextern R_DrawColumn_8_ASM -cextern R_Draw2sMultiPatchColumn_8_ASM - -;; spans -cextern nflatshiftup -cextern nflatxshift -cextern nflatyshift -cextern nflatmask -cextern ds_xfrac -cextern ds_yfrac -cextern ds_xstep -cextern ds_ystep -cextern ds_x1 -cextern ds_x2 -cextern ds_y -cextern ds_source -cextern ds_colormap - -cextern ylookup -cextern columnofs -cextern vid - -[SECTION .data] - -nflatmask64 dq 0 - - -[SECTION .text] - -;;---------------------------------------------------------------------- -;; -;; R_DrawColumn : 8bpp column drawer -;; -;; MMX column drawer. -;; -;;---------------------------------------------------------------------- -;; eax = accumulator -;; ebx = colormap -;; ecx = count -;; edx = accumulator -;; esi = source -;; edi = dest -;; ebp = vid.width -;; mm0 = accumulator -;; mm1 = heightmask, twice -;; mm2 = 2 * fracstep, twice -;; mm3 = pair of consecutive fracs -;;---------------------------------------------------------------------- - - -cglobal R_DrawColumn_8_MMX -R_DrawColumn_8_MMX: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx - -;; -;; Our algorithm requires that the texture height be a power of two. -;; If not, fall back to the non-MMX drawer. -;; -.texheightcheck: - mov edx, [dc_texheight] - sub edx, 1 ;; edx = heightmask - test edx, [dc_texheight] - jnz near .usenonMMX - - mov ebp, edx ;; Keep a copy of heightmask in a - ;; GPR for the time being. - -;; -;; Fill mm1 with heightmask -;; - movd mm1, edx ;; low dword = heightmask - punpckldq mm1, mm1 ;; copy low dword to high dword - -;; -;; dest = ylookup[dc_yl] + columnofs[dc_x]; -;; - mov eax, [dc_yl] - mov edi, [ylookup+eax*4] - mov ebx, [dc_x] - add edi, [columnofs+ebx*4] ;; edi = dest - - -;; -;; pixelcount = yh - yl + 1 -;; - mov ecx, [dc_yh] - add ecx, 1 - sub ecx, eax ;; pixel count - jle near .done ;; nothing to scale - -;; -;; fracstep = dc_iscale; -;; - movd mm2, [dc_iscale] ;; fracstep in low dword - punpckldq mm2, mm2 ;; copy to high dword - - mov ebx, [dc_colormap] - mov esi, [dc_source] - -;; -;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep)); -;; - ;; eax == dc_yl already - shl eax, FRACBITS - sub eax, [centeryfrac] - imul dword [dc_iscale] - shrd eax, edx, FRACBITS - add eax, [dc_texturemid] - -;; -;; if (dc_hires) frac = 0; -;; - test byte [dc_hires], 0x01 - jz .mod2 - xor eax, eax - - -;; -;; Do mod-2 pixel. -;; -.mod2: - test ecx, 1 - jz .pairprepare - mov edx, eax ;; edx = frac - add eax, [dc_iscale] ;; eax += fracstep - sar edx, FRACBITS - and edx, ebp ;; edx &= heightmask - movzx edx, byte [esi + edx] - movzx edx, byte [ebx + edx] - mov [edi], dl - - add edi, [vid + viddef_s.width] - sub ecx, 1 - jz .done - -.pairprepare: -;; -;; Prepare for the main loop. -;; - movd mm3, eax ;; Low dword = frac - movq mm4, mm3 ;; Copy to intermediate register - paddd mm4, mm2 ;; dwords of mm4 += fracstep - punpckldq mm3, mm4 ;; Low dword = first frac, high = second - pslld mm2, 1 ;; fracstep *= 2 - -;; -;; ebp = vid.width -;; - mov ebp, [vid + viddef_s.width] - - align 16 -.pairloop: - movq mm0, mm3 ;; 3B 1u. - psrad mm0, FRACBITS ;; 4B 1u. - pand mm0, mm1 ;; 3B 1u. frac &= heightmask - paddd mm3, mm2 ;; 3B 1u. frac += fracstep - - movd eax, mm0 ;; 3B 1u. Get first frac -;; IFETCH boundary - movzx eax, byte [esi + eax] ;; 4B 1u. Texture map - movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap - - punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword - movd edx, mm0 ;; 3B 1u. Get second frac - mov [edi], al ;; 2B 1(2)u. First pixel -;; IFETCH boundary - - movzx edx, byte [esi + edx] ;; 4B 1u. Texture map - movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap - mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel - - lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width -;; IFETCH boundary - sub ecx, 2 ;; 3B 1u. count -= 2 - jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop - - -.done: -;; -;; Clear MMX state, or else FPU operations will go badly awry. -;; - emms - - pop ebx - pop edi - pop esi - pop ebp - ret - -.usenonMMX: - call R_DrawColumn_8_ASM - jmp .done - - -;;---------------------------------------------------------------------- -;; -;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent -;; pixels. -;; -;; MMX column drawer. -;; -;;---------------------------------------------------------------------- -;; eax = accumulator -;; ebx = colormap -;; ecx = count -;; edx = accumulator -;; esi = source -;; edi = dest -;; ebp = vid.width -;; mm0 = accumulator -;; mm1 = heightmask, twice -;; mm2 = 2 * fracstep, twice -;; mm3 = pair of consecutive fracs -;;---------------------------------------------------------------------- - - -cglobal R_Draw2sMultiPatchColumn_8_MMX -R_Draw2sMultiPatchColumn_8_MMX: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx - -;; -;; Our algorithm requires that the texture height be a power of two. -;; If not, fall back to the non-MMX drawer. -;; -.texheightcheck: - mov edx, [dc_texheight] - sub edx, 1 ;; edx = heightmask - test edx, [dc_texheight] - jnz near .usenonMMX - - mov ebp, edx ;; Keep a copy of heightmask in a - ;; GPR for the time being. - -;; -;; Fill mm1 with heightmask -;; - movd mm1, edx ;; low dword = heightmask - punpckldq mm1, mm1 ;; copy low dword to high dword - -;; -;; dest = ylookup[dc_yl] + columnofs[dc_x]; -;; - mov eax, [dc_yl] - mov edi, [ylookup+eax*4] - mov ebx, [dc_x] - add edi, [columnofs+ebx*4] ;; edi = dest - - -;; -;; pixelcount = yh - yl + 1 -;; - mov ecx, [dc_yh] - add ecx, 1 - sub ecx, eax ;; pixel count - jle near .done ;; nothing to scale -;; -;; fracstep = dc_iscale; -;; - movd mm2, [dc_iscale] ;; fracstep in low dword - punpckldq mm2, mm2 ;; copy to high dword - - mov ebx, [dc_colormap] - mov esi, [dc_source] - -;; -;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep)); -;; - ;; eax == dc_yl already - shl eax, FRACBITS - sub eax, [centeryfrac] - imul dword [dc_iscale] - shrd eax, edx, FRACBITS - add eax, [dc_texturemid] - -;; -;; if (dc_hires) frac = 0; -;; - test byte [dc_hires], 0x01 - jz .mod2 - xor eax, eax - - -;; -;; Do mod-2 pixel. -;; -.mod2: - test ecx, 1 - jz .pairprepare - mov edx, eax ;; edx = frac - add eax, [dc_iscale] ;; eax += fracstep - sar edx, FRACBITS - and edx, ebp ;; edx &= heightmask - movzx edx, byte [esi + edx] - cmp dl, TRANSPARENTPIXEL - je .nextmod2 - movzx edx, byte [ebx + edx] - mov [edi], dl - -.nextmod2: - add edi, [vid + viddef_s.width] - sub ecx, 1 - jz .done - -.pairprepare: -;; -;; Prepare for the main loop. -;; - movd mm3, eax ;; Low dword = frac - movq mm4, mm3 ;; Copy to intermediate register - paddd mm4, mm2 ;; dwords of mm4 += fracstep - punpckldq mm3, mm4 ;; Low dword = first frac, high = second - pslld mm2, 1 ;; fracstep *= 2 - -;; -;; ebp = vid.width -;; - mov ebp, [vid + viddef_s.width] - - align 16 -.pairloop: - movq mm0, mm3 ;; 3B 1u. - psrad mm0, FRACBITS ;; 4B 1u. - pand mm0, mm1 ;; 3B 1u. frac &= heightmask - paddd mm3, mm2 ;; 3B 1u. frac += fracstep - - movd eax, mm0 ;; 3B 1u. Get first frac -;; IFETCH boundary - movzx eax, byte [esi + eax] ;; 4B 1u. Texture map - punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword - movd edx, mm0 ;; 3B 1u. Get second frac - cmp al, TRANSPARENTPIXEL ;; 2B 1u. - je .secondinpair ;; 2B 1u. -;; IFETCH boundary - movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap - mov [edi], al ;; 2B 1(2)u. First pixel - -.secondinpair: - movzx edx, byte [esi + edx] ;; 4B 1u. Texture map - cmp dl, TRANSPARENTPIXEL ;; 2B 1u. - je .nextpair ;; 2B 1u. -;; IFETCH boundary - movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap - mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel - -.nextpair: - lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width - sub ecx, 2 ;; 3B 1u. count -= 2 - jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop - - -.done: -;; -;; Clear MMX state, or else FPU operations will go badly awry. -;; - emms - - pop ebx - pop edi - pop esi - pop ebp - ret - -.usenonMMX: - call R_Draw2sMultiPatchColumn_8_ASM - jmp .done - - -;;---------------------------------------------------------------------- -;; -;; R_DrawSpan : 8bpp span drawer -;; -;; MMX span drawer. -;; -;;---------------------------------------------------------------------- -;; eax = accumulator -;; ebx = colormap -;; ecx = count -;; edx = accumulator -;; esi = source -;; edi = dest -;; ebp = two pixels -;; mm0 = accumulator -;; mm1 = xposition -;; mm2 = yposition -;; mm3 = 2 * xstep -;; mm4 = 2 * ystep -;; mm5 = nflatxshift -;; mm6 = nflatyshift -;; mm7 = accumulator -;;---------------------------------------------------------------------- - -cglobal R_DrawSpan_8_MMX -R_DrawSpan_8_MMX: - push ebp ;; preserve caller's stack frame pointer - push esi ;; preserve register variables - push edi - push ebx - -;; -;; esi = ds_source -;; ebx = ds_colormap -;; - mov esi, [ds_source] - mov ebx, [ds_colormap] - -;; -;; edi = ylookup[ds_y] + columnofs[ds_x1] -;; - mov eax, [ds_y] - mov edi, [ylookup + eax*4] - mov edx, [ds_x1] - add edi, [columnofs + edx*4] - -;; -;; ecx = ds_x2 - ds_x1 + 1 -;; - mov ecx, [ds_x2] - sub ecx, edx - add ecx, 1 - -;; -;; Needed for fracs and steps -;; - movd mm7, [nflatshiftup] - -;; -;; mm3 = xstep -;; - movd mm3, [ds_xstep] - pslld mm3, mm7 - punpckldq mm3, mm3 - -;; -;; mm4 = ystep -;; - movd mm4, [ds_ystep] - pslld mm4, mm7 - punpckldq mm4, mm4 - -;; -;; mm1 = pair of consecutive xpositions -;; - movd mm1, [ds_xfrac] - pslld mm1, mm7 - movq mm6, mm1 - paddd mm6, mm3 - punpckldq mm1, mm6 - -;; -;; mm2 = pair of consecutive ypositions -;; - movd mm2, [ds_yfrac] - pslld mm2, mm7 - movq mm6, mm2 - paddd mm6, mm4 - punpckldq mm2, mm6 - -;; -;; mm5 = nflatxshift -;; mm6 = nflatyshift -;; - movd mm5, [nflatxshift] - movd mm6, [nflatyshift] - -;; -;; Mask is in memory due to lack of registers. -;; - mov eax, [nflatmask] - mov [nflatmask64], eax - mov [nflatmask64 + 4], eax - - -;; -;; Go until we reach a dword boundary. -;; -.unaligned: - test edi, 3 - jz .alignedprep -.stragglers: - cmp ecx, 0 - je .done ;; If ecx == 0, we're finished. - -;; -;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift) -;; - movq mm0, mm1 ;; mm0 = xposition - movq mm7, mm2 ;; mm7 = yposition - paddd mm1, mm3 ;; xposition += xstep (once!) - paddd mm2, mm4 ;; yposition += ystep (once!) - psrld mm0, mm5 ;; shift - psrld mm7, mm6 ;; shift - pand mm7, [nflatmask64] ;; mask - por mm0, mm7 ;; or x and y together - - movd eax, mm0 ;; eax = index of first pixel - movzx eax, byte [esi + eax] ;; al = source[eax] - movzx eax, byte [ebx + eax] ;; al = colormap[al] - - mov [edi], al - add edi, 1 - - sub ecx, 1 - jmp .unaligned - - -.alignedprep: -;; -;; We can double the steps now. -;; - pslld mm3, 1 - pslld mm4, 1 - - -;; -;; Generate chunks of four pixels. -;; -.alignedloop: - -;; -;; Make sure we have at least four pixels. -;; - cmp ecx, 4 - jl .prestragglers - -;; -;; First two pixels. -;; - movq mm0, mm1 ;; mm0 = xposition - movq mm7, mm2 ;; mm7 = yposition - paddd mm1, mm3 ;; xposition += xstep - paddd mm2, mm4 ;; yposition += ystep - psrld mm0, mm5 ;; shift - psrld mm7, mm6 ;; shift - pand mm7, [nflatmask64] ;; mask - por mm0, mm7 ;; or x and y together - - movd eax, mm0 ;; eax = index of first pixel - movzx eax, byte [esi + eax] ;; al = source[eax] - movzx ebp, byte [ebx + eax] ;; ebp = colormap[al] - - punpckhdq mm0, mm0 ;; both dwords = high dword - movd eax, mm0 ;; eax = index of second pixel - movzx eax, byte [esi + eax] ;; al = source[eax] - movzx eax, byte [ebx + eax] ;; al = colormap[al] - shl eax, 8 ;; get pixel in right byte - or ebp, eax ;; put pixel in ebp - -;; -;; Next two pixels. -;; - movq mm0, mm1 ;; mm0 = xposition - movq mm7, mm2 ;; mm7 = yposition - paddd mm1, mm3 ;; xposition += xstep - paddd mm2, mm4 ;; yposition += ystep - psrld mm0, mm5 ;; shift - psrld mm7, mm6 ;; shift - pand mm7, [nflatmask64] ;; mask - por mm0, mm7 ;; or x and y together - - movd eax, mm0 ;; eax = index of third pixel - movzx eax, byte [esi + eax] ;; al = source[eax] - movzx eax, byte [ebx + eax] ;; al = colormap[al] - shl eax, 16 ;; get pixel in right byte - or ebp, eax ;; put pixel in ebp - - punpckhdq mm0, mm0 ;; both dwords = high dword - movd eax, mm0 ;; eax = index of second pixel - movzx eax, byte [esi + eax] ;; al = source[eax] - movzx eax, byte [ebx + eax] ;; al = colormap[al] - shl eax, 24 ;; get pixel in right byte - or ebp, eax ;; put pixel in ebp - -;; -;; Write pixels. -;; - mov [edi], ebp - add edi, 4 - - sub ecx, 4 - jmp .alignedloop - -.prestragglers: -;; -;; Back to one step at a time. -;; - psrad mm3, 1 - psrad mm4, 1 - jmp .stragglers - -.done: -;; -;; Clear MMX state, or else FPU operations will go badly awry. -;; - emms - - pop ebx - pop edi - pop esi - pop ebp - ret diff --git a/src/tmap_vc.nas b/src/tmap_vc.nas deleted file mode 100644 index c85cf70035f8588387420d479725242bb708cc42..0000000000000000000000000000000000000000 --- a/src/tmap_vc.nas +++ /dev/null @@ -1,48 +0,0 @@ -;; SONIC ROBO BLAST 2 -;;----------------------------------------------------------------------------- -;; Copyright (C) 1998-2000 by DooM Legacy Team. -;; Copyright (C) 1999-2023 by Sonic Team Junior. -;; -;; This program is free software distributed under the -;; terms of the GNU General Public License, version 2. -;; See the 'LICENSE' file for more details. -;;----------------------------------------------------------------------------- -;; FILE: -;; tmap_vc.nas -;; DESCRIPTION: -;; Assembler optimised math code for Visual C++. - - -[BITS 32] - -%macro cglobal 1 -%define %1 _%1 -[global %1] -%endmacro - -[SECTION .text write] - -;---------------------------------------------------------------------------- -;fixed_t FixedMul (fixed_t a, fixed_t b) -;---------------------------------------------------------------------------- -cglobal FixedMul -; align 16 -FixedMul: - mov eax,[esp+4] - imul dword [esp+8] - shrd eax,edx,16 - ret - -;---------------------------------------------------------------------------- -;fixed_t FixedDiv2 (fixed_t a, fixed_t b); -;---------------------------------------------------------------------------- -cglobal FixedDiv2 -; align 16 -FixedDiv2: - mov eax,[esp+4] - mov edx,eax ;; these two instructions allow the next - sar edx,31 ;; two to pair, on the Pentium processor. - shld edx,eax,16 - sal eax,16 - idiv dword [esp+8] - ret diff --git a/src/v_video.c b/src/v_video.c index 461a5e3bc7671684f5fdcc62a8c1a728ea913a55..3f958b286cdfdcc275a23c5822605812d218c242 100644 --- a/src/v_video.c +++ b/src/v_video.c @@ -447,12 +447,6 @@ static void CV_palette_OnChange(void) V_SetPalette(0); } -#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM) -void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes, - size_t destrowbytes); -#define HAVE_VIDCOPY -#endif - static void CV_constextsize_OnChange(void) { if (!con_refresh) @@ -466,9 +460,6 @@ static void CV_constextsize_OnChange(void) void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes, size_t destrowbytes) { -#ifdef HAVE_VIDCOPY - VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes); -#else if (srcrowbytes == destrowbytes) M_Memcpy(destptr, srcptr, srcrowbytes * height); else @@ -481,7 +472,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3 srcptr += srcrowbytes; } } -#endif } static UINT8 hudplusalpha[11] = { 10, 8, 6, 4, 2, 0, 0, 0, 0, 0, 0}; diff --git a/src/vid_copy.s b/src/vid_copy.s deleted file mode 100644 index 1473a3856f192145e3739738de85bd4f6cb96222..0000000000000000000000000000000000000000 --- a/src/vid_copy.s +++ /dev/null @@ -1,61 +0,0 @@ -// SONIC ROBO BLAST 2 -//----------------------------------------------------------------------------- -// Copyright (C) 1998-2000 by DooM Legacy Team. -// Copyright (C) 1999-2023 by Sonic Team Junior. -// -// This program is free software distributed under the -// terms of the GNU General Public License, version 2. -// See the 'LICENSE' file for more details. -//----------------------------------------------------------------------------- -/// \file vid_copy.s -/// \brief code for updating the linear frame buffer screen. - -#include "asm_defs.inc" // structures, must match the C structures! - -// DJGPPv2 is as fast as this one, but then someone may compile with a less -// good version of DJGPP than mine, so this little asm will do the trick! - -#define srcptr 4+16 -#define destptr 8+16 -#define width 12+16 -#define height 16+16 -#define srcrowbytes 20+16 -#define destrowbytes 24+16 - -// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth ); -// width is given as BYTES - -#ifdef __i386__ - -.globl C(VID_BlitLinearScreen_ASM) -C(VID_BlitLinearScreen_ASM): - pushl %ebp // preserve caller's stack frame - pushl %edi - pushl %esi // preserve register variables - pushl %ebx - - cld - movl srcptr(%esp),%esi - movl destptr(%esp),%edi - movl width(%esp),%ebx - movl srcrowbytes(%esp),%eax - subl %ebx,%eax - movl destrowbytes(%esp),%edx - subl %ebx,%edx - shrl $2,%ebx - movl height(%esp),%ebp -LLRowLoop: - movl %ebx,%ecx - rep/movsl (%esi),(%edi) - addl %eax,%esi - addl %edx,%edi - decl %ebp - jnz LLRowLoop - - popl %ebx // restore register variables - popl %esi - popl %edi - popl %ebp // restore the caller's stack frame - - ret -#endif diff --git a/tools/anglechk.c b/tools/anglechk.c index 4a67069bf744772082afeac5d8875991f2075903..7f56abff7e56336090af76e81335d76951dcec39 100644 --- a/tools/anglechk.c +++ b/tools/anglechk.c @@ -22,7 +22,6 @@ #ifdef _MSC_VER #include <assert.h> #endif -#define NOASM #include "../src/tables.h" #define NO_M #include "../src/m_fixed.c"