From 048e135c49c572013f5616a79439b3ffe690f5ad Mon Sep 17 00:00:00 2001
From: Lactozilla <jp6781615@gmail.com>
Date: Mon, 6 Nov 2023 19:27:38 -0300
Subject: [PATCH] Implement: R_DrawFloorSprite_8_RGBA,
 R_DrawTranslucentFloorSprite_8_RGBA, R_DrawTiltedFloorSprite_8_RGBA,
 R_DrawTiltedTranslucentFloorSprite_8_RGBA R_DrawFloorSprite_NPO2_8_RGBA,
 R_DrawTranslucentFloorSprite_NPO2_8_RGBA,
 R_DrawTiltedFloorSprite_NPO2_8_RGBA,
 R_DrawTiltedTranslucentFloorSprite_NPO2_8_RGBA

---
 src/r_draw.h       |   1 -
 src/r_draw8_rgba.c | 803 ++++++++++++++++++++++++++++++++++++++-------
 src/screen.c       |   4 +-
 3 files changed, 685 insertions(+), 123 deletions(-)

diff --git a/src/r_draw.h b/src/r_draw.h
index 76894383d2..0a84ec59aa 100644
--- a/src/r_draw.h
+++ b/src/r_draw.h
@@ -182,7 +182,6 @@ void R_DrawFogColumn_8(void);
 void R_DrawColumnShadowed_8(void);
 
 void R_DrawColumn_8_RGBA(void);
-void R_DrawBlendedColumn_8_RGBA(void);
 void R_DrawTranslucentColumn_8_RGBA(void);
 
 #define PLANELIGHTFLOAT (BASEVIDWIDTH * BASEVIDWIDTH / vid.width / zeroheight / 21.0f * FIXED_TO_FLOAT(fovtan))
diff --git a/src/r_draw8_rgba.c b/src/r_draw8_rgba.c
index 53e768a505..bbe9a645a5 100644
--- a/src/r_draw8_rgba.c
+++ b/src/r_draw8_rgba.c
@@ -18,95 +18,21 @@
 // a has a constant z depth from top to bottom.
 //
 
-#include "v_video.h"
-
-void R_DrawColumn_8_RGBA(void)
-{
-	INT32 count = dc_yh - dc_yl;
-	if (count < 0) // Zero length, column does not exceed a pixel.
-		return;
-
-#ifdef RANGECHECK
-	if ((unsigned)dc_x >= (unsigned)vid.width || dc_yl < 0 || dc_yh >= vid.height)
-		return;
-#endif
-
-	UINT8 *dest = &topleft[dc_yl*vid.width + dc_x];
-
-	count++;
-
-	// Determine scaling, which is the only mapping to be done.
-	fixed_t fracstep = dc_iscale;
-	fixed_t frac = dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep);
-
-	// Inner loop that does the actual texture mapping, e.g. a DDA-like scaling.
-	// This is as fast as it gets.
-	{
-		const RGBA_t *source = (RGBA_t *)dc_source;
-		const lighttable_t *colormap = dc_colormap;
-		INT32 heightmask = dc_texheight-1;
-
-		RGBA_t ocolor, color;
-		UINT8 idx;
-
-		#define GET_COLOR(f) \
-			ocolor = source[f]; \
-			idx = colormap[GetColorLUT(&r_colorlookup, ocolor.s.red, ocolor.s.green, ocolor.s.blue)]; \
-			color = pMasterPalette[idx]
-
-		if (dc_texheight & heightmask)   // not a power of 2 -- killough
-		{
-			heightmask++;
-			heightmask <<= FRACBITS;
-
-			if (frac < 0)
-				while ((frac += heightmask) <  0);
-			else
-				while (frac >= heightmask)
-					frac -= heightmask;
-
-			do
-			{
-				// Re-map color indices from wall texture column
-				//  using a lighting/special effects LUT.
-				// heightmask is the Tutti-Frutti fix
-				GET_COLOR(frac>>FRACBITS);
-				*dest = colormap[GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue)];
-				dest += vid.width;
-
-				// Avoid overflow.
-				if (fracstep > 0x7FFFFFFF - frac)
-					frac += fracstep - heightmask;
-				else
-					frac += fracstep;
-
-				while (frac >= heightmask)
-					frac -= heightmask;
-			} while (--count);
-		}
-		else
-		{
-			while ((count -= 2) >= 0) // texture height is a power of 2
-			{
-				GET_COLOR((frac>>FRACBITS) & heightmask);
-				*dest = colormap[GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue)];
-				dest += vid.width;
-				frac += fracstep;
-				GET_COLOR((frac>>FRACBITS) & heightmask);
-				*dest = colormap[GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue)];
-				dest += vid.width;
-				frac += fracstep;
-			}
-			if (count & 1)
-			{
-				GET_COLOR((frac>>FRACBITS) & heightmask);
-				*dest = colormap[GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue)];
-			}
-		}
-
-		#undef GET_COLOR
-	}
-}
+#include "v_video.h" // for GetColorLUT (should be moved to somewhere else)
+
+#define GET_COLOR(f) \
+	ocolor = source[f]; \
+	idx = colormap[GetColorLUT(&r_colorlookup, ocolor.s.red, ocolor.s.green, ocolor.s.blue)]; \
+	color = pMasterPalette[idx]; \
+	color.s.alpha = ocolor.s.alpha; \
+	color.rgba = BlendPixel(pMasterPalette[*dest], color)
+
+#define GET_COLOR_BLENDED(f) \
+	ocolor = source[f]; \
+	idx = colormap[GetColorLUT(&r_colorlookup, ocolor.s.red, ocolor.s.green, ocolor.s.blue)]; \
+	color = pMasterPalette[idx]; \
+	color.s.alpha = ocolor.s.alpha; \
+	color.rgba = ASTBlendPixel(pMasterPalette[*dest], color, dc_blendmode, dc_opacity)
 
 static UINT32 BlendPixel(RGBA_t background, RGBA_t foreground)
 {
@@ -118,7 +44,7 @@ static UINT32 BlendPixel(RGBA_t background, RGBA_t foreground)
 	return output.rgba;
 }
 
-void R_DrawBlendedColumn_8_RGBA(void)
+void R_DrawColumn_8_RGBA(void)
 {
 	INT32 count = dc_yh - dc_yl;
 	if (count < 0) // Zero length, column does not exceed a pixel.
@@ -147,13 +73,6 @@ void R_DrawBlendedColumn_8_RGBA(void)
 		RGBA_t ocolor, color;
 		UINT8 idx;
 
-		#define GET_COLOR(f) \
-			ocolor = source[f]; \
-			idx = colormap[GetColorLUT(&r_colorlookup, ocolor.s.red, ocolor.s.green, ocolor.s.blue)]; \
-			color = pMasterPalette[idx]; \
-			color.s.alpha = ocolor.s.alpha; \
-			color.rgba = BlendPixel(pMasterPalette[*dest], color)
-
 		if (dc_texheight & heightmask)   // not a power of 2 -- killough
 		{
 			heightmask++;
@@ -203,8 +122,6 @@ void R_DrawBlendedColumn_8_RGBA(void)
 				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
 			}
 		}
-
-		#undef GET_COLOR
 	}
 }
 
@@ -237,13 +154,6 @@ void R_DrawTranslucentColumn_8_RGBA(void)
 		RGBA_t ocolor, color;
 		UINT8 idx;
 
-		#define GET_COLOR(f) \
-			ocolor = source[f]; \
-			idx = colormap[GetColorLUT(&r_colorlookup, ocolor.s.red, ocolor.s.green, ocolor.s.blue)]; \
-			color = pMasterPalette[idx]; \
-			color.s.alpha = ocolor.s.alpha; \
-			color.rgba = ASTBlendPixel(pMasterPalette[*dest], color, dc_blendmode, dc_opacity)
-
 		if (dc_texheight & heightmask)   // not a power of 2 -- killough
 		{
 			heightmask++;
@@ -260,7 +170,7 @@ void R_DrawTranslucentColumn_8_RGBA(void)
 				// Re-map color indices from wall texture column
 				//  using a lighting/special effects LUT.
 				// heightmask is the Tutti-Frutti fix
-				GET_COLOR(frac >> FRACBITS);
+				GET_COLOR_BLENDED(frac >> FRACBITS);
 				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
 				dest += vid.width;
 
@@ -278,23 +188,21 @@ void R_DrawTranslucentColumn_8_RGBA(void)
 		{
 			while ((count -= 2) >= 0) // texture height is a power of 2
 			{
-				GET_COLOR((frac>>FRACBITS) & heightmask);
+				GET_COLOR_BLENDED((frac>>FRACBITS) & heightmask);
 				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
 				dest += vid.width;
 				frac += fracstep;
-				GET_COLOR((frac>>FRACBITS) & heightmask);
+				GET_COLOR_BLENDED((frac>>FRACBITS) & heightmask);
 				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
 				dest += vid.width;
 				frac += fracstep;
 			}
 			if (count & 1)
 			{
-				GET_COLOR((frac>>FRACBITS) & heightmask);
+				GET_COLOR_BLENDED((frac>>FRACBITS) & heightmask);
 				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
 			}
 		}
-
-		#undef GET_COLOR
 	}
 }
 
@@ -304,40 +212,695 @@ void R_DrawTranslucentColumn_8_RGBA(void)
 
 void R_DrawFloorSprite_8_RGBA(void)
 {
-	// TODO
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
+
+	size_t count = (ds_x2 - ds_x1 + 1);
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	xposition = ds_xfrac << nflatshiftup;
+	yposition = ds_yfrac << nflatshiftup;
+	xstep = ds_xstep << nflatshiftup;
+	ystep = ds_ystep << nflatshiftup;
+
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+
+	while (count-- && dest <= deststop)
+	{
+		GET_COLOR((((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift));
+		*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		dest++;
+		xposition += xstep;
+		yposition += ystep;
+	}
 }
 
 void R_DrawTranslucentFloorSprite_8_RGBA(void)
 {
-	// TODO
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
+
+	size_t count = (ds_x2 - ds_x1 + 1);
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	xposition = ds_xfrac << nflatshiftup;
+	yposition = ds_yfrac << nflatshiftup;
+	xstep = ds_xstep << nflatshiftup;
+	ystep = ds_ystep << nflatshiftup;
+
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+
+	while (count-- && dest <= deststop)
+	{
+		GET_COLOR_BLENDED((((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift));
+		*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		dest++;
+		xposition += xstep;
+		yposition += ystep;
+	}
 }
 
 void R_DrawTiltedFloorSprite_8_RGBA(void)
 {
-	// TODO
+	int width = ds_x2 - ds_x1;
+	double iz, uz, vz;
+	UINT32 u, v;
+	int i;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+
+	double startz, startu, startv;
+	double izstep, uzstep, vzstep;
+	double endz, endu, endv;
+	UINT32 stepu, stepv;
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
+	uz = ds_sup->z + ds_sup->y*(centery-ds_y) + ds_sup->x*(ds_x1-centerx);
+	vz = ds_svp->z + ds_svp->y*(centery-ds_y) + ds_svp->x*(ds_x1-centerx);
+
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+
+	startz = 1.f/iz;
+	startu = uz*startz;
+	startv = vz*startz;
+
+	izstep = ds_szp->x * SPANSIZE;
+	uzstep = ds_sup->x * SPANSIZE;
+	vzstep = ds_svp->x * SPANSIZE;
+	width++;
+
+	while (width >= SPANSIZE)
+	{
+		iz += izstep;
+		uz += uzstep;
+		vz += vzstep;
+
+		endz = 1.f/iz;
+		endu = uz*endz;
+		endv = vz*endz;
+		stepu = (INT64)((endu - startu) * INVSPAN);
+		stepv = (INT64)((endv - startv) * INVSPAN);
+		u = (INT64)(startu);
+		v = (INT64)(startv);
+
+		for (i = SPANSIZE-1; i >= 0; i--)
+		{
+			GET_COLOR(((v >> nflatyshift) & nflatmask) | (u >> nflatxshift));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+			dest++;
+
+			u += stepu;
+			v += stepv;
+		}
+		startu = endu;
+		startv = endv;
+		width -= SPANSIZE;
+	}
+	if (width > 0)
+	{
+		if (width == 1)
+		{
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+			GET_COLOR(((v >> nflatyshift) & nflatmask) | (u >> nflatxshift));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		}
+		else
+		{
+			double left = width;
+			iz += ds_szp->x * left;
+			uz += ds_sup->x * left;
+			vz += ds_svp->x * left;
+
+			endz = 1.f/iz;
+			endu = uz*endz;
+			endv = vz*endz;
+			left = 1.f/left;
+			stepu = (INT64)((endu - startu) * left);
+			stepv = (INT64)((endv - startv) * left);
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+
+			for (; width != 0; width--)
+			{
+				GET_COLOR(((v >> nflatyshift) & nflatmask) | (u >> nflatxshift));
+				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+				dest++;
+
+				u += stepu;
+				v += stepv;
+			}
+		}
+	}
 }
 
 void R_DrawTiltedTranslucentFloorSprite_8_RGBA(void)
 {
-	// TODO
+	int width = ds_x2 - ds_x1;
+	double iz, uz, vz;
+	UINT32 u, v;
+	int i;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+
+	double startz, startu, startv;
+	double izstep, uzstep, vzstep;
+	double endz, endu, endv;
+	UINT32 stepu, stepv;
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
+	uz = ds_sup->z + ds_sup->y*(centery-ds_y) + ds_sup->x*(ds_x1-centerx);
+	vz = ds_svp->z + ds_svp->y*(centery-ds_y) + ds_svp->x*(ds_x1-centerx);
+
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+
+	startz = 1.f/iz;
+	startu = uz*startz;
+	startv = vz*startz;
+
+	izstep = ds_szp->x * SPANSIZE;
+	uzstep = ds_sup->x * SPANSIZE;
+	vzstep = ds_svp->x * SPANSIZE;
+	width++;
+
+	while (width >= SPANSIZE)
+	{
+		iz += izstep;
+		uz += uzstep;
+		vz += vzstep;
+
+		endz = 1.f/iz;
+		endu = uz*endz;
+		endv = vz*endz;
+		stepu = (INT64)((endu - startu) * INVSPAN);
+		stepv = (INT64)((endv - startv) * INVSPAN);
+		u = (INT64)(startu);
+		v = (INT64)(startv);
+
+		for (i = SPANSIZE-1; i >= 0; i--)
+		{
+			GET_COLOR_BLENDED(((v >> nflatyshift) & nflatmask) | (u >> nflatxshift));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+			dest++;
+
+			u += stepu;
+			v += stepv;
+		}
+		startu = endu;
+		startv = endv;
+		width -= SPANSIZE;
+	}
+	if (width > 0)
+	{
+		if (width == 1)
+		{
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+			GET_COLOR_BLENDED(((v >> nflatyshift) & nflatmask) | (u >> nflatxshift));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		}
+		else
+		{
+			double left = width;
+			iz += ds_szp->x * left;
+			uz += ds_sup->x * left;
+			vz += ds_svp->x * left;
+
+			endz = 1.f/iz;
+			endu = uz*endz;
+			endv = vz*endz;
+			left = 1.f/left;
+			stepu = (INT64)((endu - startu) * left);
+			stepv = (INT64)((endv - startv) * left);
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+
+			for (; width != 0; width--)
+			{
+				GET_COLOR_BLENDED(((v >> nflatyshift) & nflatmask) | (u >> nflatxshift));
+				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+				dest++;
+
+				u += stepu;
+				v += stepv;
+			}
+		}
+	}
 }
 
+#if defined(__GNUC__) || defined(__clang__) // Suppress intentional libdivide compiler warnings - Also added to libdivide.h
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Waggregate-return"
+#endif
+
 void R_DrawFloorSprite_NPO2_8_RGBA(void)
 {
-	// TODO
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
+	fixed_t x, y;
+	fixed_t fixedwidth, fixedheight;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
+
+	size_t count = (ds_x2 - ds_x1 + 1);
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	xposition = ds_xfrac; yposition = ds_yfrac;
+	xstep = ds_xstep; ystep = ds_ystep;
+
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+
+	fixedwidth = ds_flatwidth << FRACBITS;
+	fixedheight = ds_flatheight << FRACBITS;
+
+	// Fix xposition and yposition if they are out of bounds.
+	if (xposition < 0)
+		xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth);
+	else if (xposition >= fixedwidth)
+		xposition %= fixedwidth;
+	if (yposition < 0)
+		yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight);
+	else if (yposition >= fixedheight)
+		yposition %= fixedheight;
+
+	while (count-- && dest <= deststop)
+	{
+		// The loops here keep the texture coordinates within the texture.
+		// They will rarely iterate multiple times, and are cheaper than a modulo operation,
+		// even if using libdivide.
+		if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop
+			while (xposition < 0)
+				xposition += fixedwidth;
+		else
+			while (xposition >= fixedwidth)
+				xposition -= fixedwidth;
+		if (ystep < 0)
+			while (yposition < 0)
+				yposition += fixedheight;
+		else
+			while (yposition >= fixedheight)
+				yposition -= fixedheight;
+
+		x = (xposition >> FRACBITS);
+		y = (yposition >> FRACBITS);
+		GET_COLOR(((y * ds_flatwidth) + x));
+		*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		dest++;
+		xposition += xstep;
+		yposition += ystep;
+	}
 }
 
 void R_DrawTranslucentFloorSprite_NPO2_8_RGBA(void)
 {
-	// TODO
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
+	fixed_t x, y;
+	fixed_t fixedwidth, fixedheight;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
+
+	size_t count = (ds_x2 - ds_x1 + 1);
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	xposition = ds_xfrac; yposition = ds_yfrac;
+	xstep = ds_xstep; ystep = ds_ystep;
+
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+
+	fixedwidth = ds_flatwidth << FRACBITS;
+	fixedheight = ds_flatheight << FRACBITS;
+
+	// Fix xposition and yposition if they are out of bounds.
+	if (xposition < 0)
+		xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth);
+	else if (xposition >= fixedwidth)
+		xposition %= fixedwidth;
+	if (yposition < 0)
+		yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight);
+	else if (yposition >= fixedheight)
+		yposition %= fixedheight;
+
+	while (count-- && dest <= deststop)
+	{
+		// The loops here keep the texture coordinates within the texture.
+		// They will rarely iterate multiple times, and are cheaper than a modulo operation,
+		// even if using libdivide.
+		if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop
+			while (xposition < 0)
+				xposition += fixedwidth;
+		else
+			while (xposition >= fixedwidth)
+				xposition -= fixedwidth;
+		if (ystep < 0)
+			while (yposition < 0)
+				yposition += fixedheight;
+		else
+			while (yposition >= fixedheight)
+				yposition -= fixedheight;
+
+		x = (xposition >> FRACBITS);
+		y = (yposition >> FRACBITS);
+		GET_COLOR_BLENDED(((y * ds_flatwidth) + x));
+		*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		dest++;
+		xposition += xstep;
+		yposition += ystep;
+	}
 }
 
 void R_DrawTiltedFloorSprite_NPO2_8_RGBA(void)
 {
-	// TODO
+	int width = ds_x2 - ds_x1;
+	double iz, uz, vz;
+	UINT32 u, v;
+	int i;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+
+	double startz, startu, startv;
+	double izstep, uzstep, vzstep;
+	double endz, endu, endv;
+	UINT32 stepu, stepv;
+
+	struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
+	struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
+	uz = ds_sup->z + ds_sup->y*(centery-ds_y) + ds_sup->x*(ds_x1-centerx);
+	vz = ds_svp->z + ds_svp->y*(centery-ds_y) + ds_svp->x*(ds_x1-centerx);
+
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+
+	startz = 1.f/iz;
+	startu = uz*startz;
+	startv = vz*startz;
+
+	izstep = ds_szp->x * SPANSIZE;
+	uzstep = ds_sup->x * SPANSIZE;
+	vzstep = ds_svp->x * SPANSIZE;
+	width++;
+
+	while (width >= SPANSIZE)
+	{
+		iz += izstep;
+		uz += uzstep;
+		vz += vzstep;
+
+		endz = 1.f/iz;
+		endu = uz*endz;
+		endv = vz*endz;
+		stepu = (INT64)((endu - startu) * INVSPAN);
+		stepv = (INT64)((endv - startv) * INVSPAN);
+		u = (INT64)(startu);
+		v = (INT64)(startv);
+
+		for (i = SPANSIZE-1; i >= 0; i--)
+		{
+			fixed_t x = (((fixed_t)u) >> FRACBITS);
+			fixed_t y = (((fixed_t)v) >> FRACBITS);
+
+			if (x < 0)
+				x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
+			else
+				x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
+			if (y < 0)
+				y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
+			else
+				y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
+
+			GET_COLOR(((y * ds_flatwidth) + x));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+			dest++;
+
+			u += stepu;
+			v += stepv;
+		}
+		startu = endu;
+		startv = endv;
+		width -= SPANSIZE;
+	}
+	if (width > 0)
+	{
+		if (width == 1)
+		{
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+
+			fixed_t x = (((fixed_t)u) >> FRACBITS);
+			fixed_t y = (((fixed_t)v) >> FRACBITS);
+
+			if (x < 0)
+				x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
+			else
+				x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
+			if (y < 0)
+				y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
+			else
+				y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
+
+			GET_COLOR(((y * ds_flatwidth) + x));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		}
+		else
+		{
+			double left = width;
+			iz += ds_szp->x * left;
+			uz += ds_sup->x * left;
+			vz += ds_svp->x * left;
+
+			endz = 1.f/iz;
+			endu = uz*endz;
+			endv = vz*endz;
+			left = 1.f/left;
+			stepu = (INT64)((endu - startu) * left);
+			stepv = (INT64)((endv - startv) * left);
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+
+			for (; width != 0; width--)
+			{
+				fixed_t x = (((fixed_t)u) >> FRACBITS);
+				fixed_t y = (((fixed_t)v) >> FRACBITS);
+
+				if (x < 0)
+					x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
+				else
+					x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
+				if (y < 0)
+					y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
+				else
+					y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
+
+				GET_COLOR(((y * ds_flatwidth) + x));
+				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+				dest++;
+
+				u += stepu;
+				v += stepv;
+			}
+		}
+	}
 }
 
 void R_DrawTiltedTranslucentFloorSprite_NPO2_8_RGBA(void)
 {
-	// TODO
+	int width = ds_x2 - ds_x1;
+	double iz, uz, vz;
+	UINT32 u, v;
+	int i;
+
+	RGBA_t *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+
+	double startz, startu, startv;
+	double izstep, uzstep, vzstep;
+	double endz, endu, endv;
+	UINT32 stepu, stepv;
+
+	struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
+	struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
+
+	RGBA_t ocolor, color;
+	UINT8 idx;
+
+	iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
+	uz = ds_sup->z + ds_sup->y*(centery-ds_y) + ds_sup->x*(ds_x1-centerx);
+	vz = ds_svp->z + ds_svp->y*(centery-ds_y) + ds_svp->x*(ds_x1-centerx);
+
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+	source = (RGBA_t *)ds_source;
+	colormap = ds_colormap;
+
+	startz = 1.f/iz;
+	startu = uz*startz;
+	startv = vz*startz;
+
+	izstep = ds_szp->x * SPANSIZE;
+	uzstep = ds_sup->x * SPANSIZE;
+	vzstep = ds_svp->x * SPANSIZE;
+	width++;
+
+	while (width >= SPANSIZE)
+	{
+		iz += izstep;
+		uz += uzstep;
+		vz += vzstep;
+
+		endz = 1.f/iz;
+		endu = uz*endz;
+		endv = vz*endz;
+		stepu = (INT64)((endu - startu) * INVSPAN);
+		stepv = (INT64)((endv - startv) * INVSPAN);
+		u = (INT64)(startu);
+		v = (INT64)(startv);
+
+		for (i = SPANSIZE-1; i >= 0; i--)
+		{
+			fixed_t x = (((fixed_t)u) >> FRACBITS);
+			fixed_t y = (((fixed_t)v) >> FRACBITS);
+
+			if (x < 0)
+				x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
+			else
+				x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
+			if (y < 0)
+				y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
+			else
+				y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
+
+			GET_COLOR(((y * ds_flatwidth) + x));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+			dest++;
+
+			u += stepu;
+			v += stepv;
+		}
+		startu = endu;
+		startv = endv;
+		width -= SPANSIZE;
+	}
+	if (width > 0)
+	{
+		if (width == 1)
+		{
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+
+			fixed_t x = (((fixed_t)u) >> FRACBITS);
+			fixed_t y = (((fixed_t)v) >> FRACBITS);
+
+			if (x < 0)
+				x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
+			else
+				x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
+			if (y < 0)
+				y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
+			else
+				y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
+
+			GET_COLOR_BLENDED(((y * ds_flatwidth) + x));
+			*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+		}
+		else
+		{
+			double left = width;
+			iz += ds_szp->x * left;
+			uz += ds_sup->x * left;
+			vz += ds_svp->x * left;
+
+			endz = 1.f/iz;
+			endu = uz*endz;
+			endv = vz*endz;
+			left = 1.f/left;
+			stepu = (INT64)((endu - startu) * left);
+			stepv = (INT64)((endv - startv) * left);
+			u = (INT64)(startu);
+			v = (INT64)(startv);
+
+			for (; width != 0; width--)
+			{
+				fixed_t x = (((fixed_t)u) >> FRACBITS);
+				fixed_t y = (((fixed_t)v) >> FRACBITS);
+
+				if (x < 0)
+					x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
+				else
+					x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
+				if (y < 0)
+					y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
+				else
+					y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
+
+				GET_COLOR_BLENDED(((y * ds_flatwidth) + x));
+				*dest = GetColorLUT(&r_colorlookup, color.s.red, color.s.green, color.s.blue);
+				dest++;
+
+				u += stepu;
+				v += stepv;
+			}
+		}
+	}
 }
+
+#if defined(__GNUC__) || defined(__clang__) // Stop suppressing intentional libdivide compiler warnings
+    #pragma GCC diagnostic pop
+#endif
diff --git a/src/screen.c b/src/screen.c
index dd601031b2..21817dc9fb 100644
--- a/src/screen.c
+++ b/src/screen.c
@@ -123,9 +123,9 @@ void SCR_SetDrawFuncs(void)
 	colfuncs[COLDRAWFUNC_TWOSMULTIPATCHTRANS] = R_Draw2sMultiPatchTranslucentColumn_8;
 	colfuncs[COLDRAWFUNC_FOG] = R_DrawFogColumn_8;
 
-	colfuncs_rgba[BASEDRAWFUNC] = R_DrawBlendedColumn_8_RGBA;
+	colfuncs_rgba[BASEDRAWFUNC] = R_DrawColumn_8_RGBA;
 	colfuncs_rgba[COLDRAWFUNC_TRANSLU] = R_DrawTranslucentColumn_8_RGBA;
-	colfuncs_rgba[COLDRAWFUNC_MAPPED] = R_DrawBlendedColumn_8_RGBA;
+	colfuncs_rgba[COLDRAWFUNC_MAPPED] = R_DrawColumn_8_RGBA;
 	colfuncs_rgba[COLDRAWFUNC_TRANSLU_MAPPED] = R_DrawTranslucentColumn_8_RGBA;
 
 	spanfuncs[SPANDRAWFUNC_TRANS] = R_DrawTranslucentSpan_8;
-- 
GitLab