diff --git a/src/doomdef.h b/src/doomdef.h
index 6e7db21432e24719c0c63eafc638637e31be114b..4a017436931be6f1cfa2d199ad7fbd6aa93f53fc 100644
--- a/src/doomdef.h
+++ b/src/doomdef.h
@@ -616,4 +616,8 @@ extern const char *compdate, *comptime, *comprevision, *compbranch;
 ///      	SRB2CB itself ported this from PrBoom+
 #define NEWCLIP
 
+#ifndef HAVE_PNG
+#define NO_PNG_LUMPS
+#endif
+
 #endif // __DOOMDEF__
diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c
index 6bc2c712e3f5c593d24aed10dd643c7134d76d50..c9a75a4f369007e05b3d1be0c0bade089719cf9d 100644
--- a/src/hardware/hw_cache.c
+++ b/src/hardware/hw_cache.c
@@ -30,6 +30,7 @@
 #include "../z_zone.h"
 #include "../v_video.h"
 #include "../r_draw.h"
+#include "../p_setup.h"
 
 //Hurdler: 25/04/2000: used for new colormap code in hardware mode
 //static UINT8 *gr_colormap = NULL; // by default it must be NULL ! (because colormap tables are not initialized)
@@ -420,6 +421,7 @@ static void HWR_DrawTexturePatchInCache(GLMipmap_t *mipmap,
 static void HWR_ResizeBlock(INT32 originalwidth, INT32 originalheight,
 	GrTexInfo *grInfo)
 {
+#ifdef GLIDE_API_COMPATIBILITY
 	//   Build the full textures from patches.
 	static const GrLOD_t gr_lods[9] =
 	{
@@ -456,6 +458,9 @@ static void HWR_ResizeBlock(INT32 originalwidth, INT32 originalheight,
 
 	INT32     j,k;
 	INT32     max,min;
+#else
+	(void)grInfo;
+#endif
 
 	// find a power of 2 width/height
 	if (cv_grrounddown.value)
@@ -511,6 +516,7 @@ static void HWR_ResizeBlock(INT32 originalwidth, INT32 originalheight,
 	}
 	else
 	{
+#ifdef GLIDE_API_COMPATIBILITY
 		//size up to nearest power of 2
 		blockwidth = 1;
 		while (blockwidth < originalwidth)
@@ -528,9 +534,14 @@ static void HWR_ResizeBlock(INT32 originalwidth, INT32 originalheight,
 		if (blockheight > 2048)
 			blockheight = 2048;
 			//I_Error("3D GenerateTexture : too big");
+#else
+		blockwidth = originalwidth;
+		blockheight = originalheight;
+#endif
 	}
 
 	// do the boring LOD stuff.. blech!
+#ifdef GLIDE_API_COMPATIBILITY
 	if (blockwidth >= blockheight)
 	{
 		max = blockwidth;
@@ -562,6 +573,7 @@ static void HWR_ResizeBlock(INT32 originalwidth, INT32 originalheight,
 	if (blockwidth < blockheight)
 		j += 4;
 	grInfo->aspectRatioLog2 = gr_aspects[j].aspect;
+#endif
 
 	blocksize = blockwidth * blockheight;
 
@@ -650,7 +662,12 @@ static void HWR_GenerateTexture(INT32 texnum, GLTexture_t *grtex)
 	// Composite the columns together.
 	for (i = 0, patch = texture->patches; i < texture->patchcount; i++, patch++)
 	{
+		size_t lumplength = W_LumpLengthPwad(patch->wad, patch->lump);
 		realpatch = W_CacheLumpNumPwad(patch->wad, patch->lump, PU_CACHE);
+#ifndef NO_PNG_LUMPS
+		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+#endif
 		HWR_DrawTexturePatchInCache(&grtex->mipmap,
 		                     blockwidth, blockheight,
 		                     texture, patch,
@@ -756,11 +773,13 @@ void HWR_MakePatch (const patch_t *patch, GLPatch_t *grPatch, GLMipmap_t *grMipm
 
 static size_t gr_numtextures;
 static GLTexture_t *gr_textures; // for ALL Doom textures
+static GLTexture_t *gr_textures2;
 
 void HWR_InitTextureCache(void)
 {
 	gr_numtextures = 0;
 	gr_textures = NULL;
+	gr_textures2 = NULL;
 }
 
 
@@ -799,7 +818,10 @@ void HWR_FreeTextureCache(void)
 	// texturecache info, we can free it
 	if (gr_textures)
 		free(gr_textures);
+	if (gr_textures2)
+		free(gr_textures2);
 	gr_textures = NULL;
+	gr_textures2 = NULL;
 	gr_numtextures = 0;
 }
 
@@ -817,6 +839,9 @@ void HWR_PrepLevelCache(size_t pnumtextures)
 	gr_textures = calloc(pnumtextures, sizeof (*gr_textures));
 	if (gr_textures == NULL)
 		I_Error("3D can't alloc gr_textures");
+	gr_textures2 = calloc(pnumtextures, sizeof (*gr_textures2));
+	if (gr_textures2 == NULL)
+		I_Error("3D can't alloc gr_textures2");
 }
 
 void HWR_SetPalette(RGBA_t *palette)
@@ -847,7 +872,7 @@ GLTexture_t *HWR_GetTexture(INT32 tex)
 	GLTexture_t *grtex;
 #ifdef PARANOIA
 	if ((unsigned)tex >= gr_numtextures)
-		I_Error(" HWR_GetTexture: tex >= numtextures\n");
+		I_Error("HWR_GetTexture: tex >= numtextures\n");
 #endif
 	grtex = &gr_textures[tex];
 
@@ -862,15 +887,39 @@ GLTexture_t *HWR_GetTexture(INT32 tex)
 	return grtex;
 }
 
+// HWR_RenderPlane and HWR_RenderPolyObjectPlane need this to get the flat dimensions from a patch.
+lumpnum_t gr_patchflat;
+
+static void HWR_LoadPatchFlat(GLMipmap_t *grMipmap, lumpnum_t flatlumpnum)
+{
+	UINT8 *flat;
+	patch_t *patch = (patch_t *)W_CacheLumpNum(flatlumpnum, PU_STATIC);
+	size_t lumplength = W_LumpLength(flatlumpnum);
+
+#ifndef NO_PNG_LUMPS
+	if (R_IsLumpPNG((UINT8 *)patch, lumplength))
+		patch = R_PNGToPatch((UINT8 *)patch, lumplength);
+#endif
+
+	grMipmap->width  = (UINT16)SHORT(patch->width);
+	grMipmap->height = (UINT16)SHORT(patch->height);
+
+	flat = Z_Malloc(grMipmap->width * grMipmap->height, PU_HWRCACHE, &grMipmap->grInfo.data);
+	memset(flat, TRANSPARENTPIXEL, grMipmap->width * grMipmap->height);
+
+	R_PatchToFlat(patch, flat);
+}
 
 static void HWR_CacheFlat(GLMipmap_t *grMipmap, lumpnum_t flatlumpnum)
 {
 	size_t size, pflatsize;
 
 	// setup the texture info
+#ifdef GLIDE_API_COMPATIBILITY
 	grMipmap->grInfo.smallLodLog2 = GR_LOD_LOG2_64;
 	grMipmap->grInfo.largeLodLog2 = GR_LOD_LOG2_64;
 	grMipmap->grInfo.aspectRatioLog2 = GR_ASPECT_LOG2_1x1;
+#endif
 	grMipmap->grInfo.format = GR_TEXFMT_P_8;
 	grMipmap->flags = TF_WRAPXY|TF_CHROMAKEYED;
 
@@ -900,14 +949,19 @@ static void HWR_CacheFlat(GLMipmap_t *grMipmap, lumpnum_t flatlumpnum)
 			pflatsize = 64;
 			break;
 	}
-	grMipmap->width  = (UINT16)pflatsize;
-	grMipmap->height = (UINT16)pflatsize;
 
-	// the flat raw data needn't be converted with palettized textures
-	W_ReadLump(flatlumpnum, Z_Malloc(W_LumpLength(flatlumpnum),
-		PU_HWRCACHE, &grMipmap->grInfo.data));
-}
+	if (R_CheckIfPatch(flatlumpnum))
+		HWR_LoadPatchFlat(grMipmap, flatlumpnum);
+	else
+	{
+		grMipmap->width  = (UINT16)pflatsize;
+		grMipmap->height = (UINT16)pflatsize;
 
+		// the flat raw data needn't be converted with palettized textures
+		W_ReadLump(flatlumpnum, Z_Malloc(W_LumpLength(flatlumpnum),
+			PU_HWRCACHE, &grMipmap->grInfo.data));
+	}
+}
 
 // Download a Doom 'flat' to the hardware cache and make it ready for use
 void HWR_GetFlat(lumpnum_t flatlumpnum)
@@ -923,6 +977,52 @@ void HWR_GetFlat(lumpnum_t flatlumpnum)
 
 	// The system-memory data can be purged now.
 	Z_ChangeTag(grmip->grInfo.data, PU_HWRCACHE_UNLOCKED);
+
+	gr_patchflat = 0;
+	if (R_CheckIfPatch(flatlumpnum))
+		gr_patchflat = flatlumpnum;
+}
+
+static void HWR_LoadTextureFlat(GLMipmap_t *grMipmap, INT32 texturenum)
+{
+	UINT8 *flat;
+
+	// setup the texture info
+#ifdef GLIDE_API_COMPATIBILITY
+	grMipmap->grInfo.smallLodLog2 = GR_LOD_LOG2_64;
+	grMipmap->grInfo.largeLodLog2 = GR_LOD_LOG2_64;
+	grMipmap->grInfo.aspectRatioLog2 = GR_ASPECT_LOG2_1x1;
+#endif
+	grMipmap->grInfo.format = GR_TEXFMT_P_8;
+	grMipmap->flags = TF_WRAPXY|TF_CHROMAKEYED;
+
+	grMipmap->width  = (UINT16)textures[texturenum]->width;
+	grMipmap->height = (UINT16)textures[texturenum]->height;
+
+	flat = Z_Malloc(grMipmap->width * grMipmap->height, PU_HWRCACHE, &grMipmap->grInfo.data);
+	memset(flat, TRANSPARENTPIXEL, grMipmap->width * grMipmap->height);
+
+	R_TextureToFlat(texturenum, flat);
+}
+
+void HWR_GetTextureFlat(INT32 texturenum)
+{
+	GLTexture_t *grtex;
+#ifdef PARANOIA
+	if ((unsigned)texturenum >= gr_numtextures)
+		I_Error("HWR_GetTextureFlat: texturenum >= numtextures\n");
+#endif
+	if (texturenum == 0 || texturenum == -1)
+		return;
+	grtex = &gr_textures2[texturenum];
+
+	if (!grtex->mipmap.grInfo.data && !grtex->mipmap.downloaded)
+		HWR_LoadTextureFlat(&grtex->mipmap, texturenum);
+
+	HWD.pfnSetTexture(&grtex->mipmap);
+
+	// The system-memory data can be purged now.
+	Z_ChangeTag(grtex->mipmap.grInfo.data, PU_HWRCACHE_UNLOCKED);
 }
 
 //
diff --git a/src/hardware/hw_glide.h b/src/hardware/hw_glide.h
index 2625d58640bbc26d62ea18ca156b8019aee2c9bf..bf91229efa1ccde01f73436079cc009a12aa62ac 100644
--- a/src/hardware/hw_glide.h
+++ b/src/hardware/hw_glide.h
@@ -59,9 +59,11 @@ typedef FxI32 GrTextureFormat_t;
 
 typedef struct
 {
+#ifdef GLIDE_API_COMPATIBILITY
 	GrLOD_t           smallLodLog2;
 	GrLOD_t           largeLodLog2;
 	GrAspectRatio_t   aspectRatioLog2;
+#endif
 	GrTextureFormat_t format;
 	void              *data;
 } GrTexInfo;
diff --git a/src/hardware/hw_glob.h b/src/hardware/hw_glob.h
index 9656e54e9b1d44df9bc921cbb544a03e8101a26d..c7b06edfd1f1455e5d399a0a51e58771fce88786 100644
--- a/src/hardware/hw_glob.h
+++ b/src/hardware/hw_glob.h
@@ -101,6 +101,7 @@ void HWR_FreeTextureCache(void);
 void HWR_FreeExtraSubsectors(void);
 
 void HWR_GetFlat(lumpnum_t flatlumpnum);
+void HWR_GetTextureFlat(INT32 texturenum);
 GLTexture_t *HWR_GetTexture(INT32 tex);
 void HWR_GetPatch(GLPatch_t *gpatch);
 void HWR_GetMappedPatch(GLPatch_t *gpatch, const UINT8 *colormap);
@@ -114,6 +115,8 @@ void HWR_GetFadeMask(lumpnum_t fademasklumpnum);
 // --------
 // hw_draw.c
 // --------
+extern lumpnum_t gr_patchflat;
+
 extern float gr_patch_scalex;
 extern float gr_patch_scaley;
 
diff --git a/src/hardware/hw_light.c b/src/hardware/hw_light.c
index edfe328b80dd126a16557e7f7d6eeaca75701ce4..1de20cad7a343a9311458aadbbaa5235e3f92a50 100644
--- a/src/hardware/hw_light.c
+++ b/src/hardware/hw_light.c
@@ -1225,9 +1225,11 @@ static void HWR_SetLight(void)
 		lightmappatch.height = 128;
 		lightmappatch.mipmap.width = 128;
 		lightmappatch.mipmap.height = 128;
+#ifdef GLIDE_API_COMPATIBILITY
 		lightmappatch.mipmap.grInfo.smallLodLog2 = GR_LOD_LOG2_128;
 		lightmappatch.mipmap.grInfo.largeLodLog2 = GR_LOD_LOG2_128;
 		lightmappatch.mipmap.grInfo.aspectRatioLog2 = GR_ASPECT_LOG2_1x1;
+#endif
 		lightmappatch.mipmap.flags = 0; //TF_WRAPXY; // DEBUG: view the overdraw !
 	}
 	HWD.pfnSetTexture(&lightmappatch.mipmap);
diff --git a/src/hardware/hw_main.c b/src/hardware/hw_main.c
index c6a8b16e568b24371af0dbd1da4e275c128444ad..07ae7ed2b21e202e27e2cb5c049e7ee7005de59b 100644
--- a/src/hardware/hw_main.c
+++ b/src/hardware/hw_main.c
@@ -70,9 +70,9 @@ static void HWR_ProjectPrecipitationSprite(precipmobj_t *thing);
 #endif
 
 #ifdef SORTING
-void HWR_AddTransparentFloor(lumpnum_t lumpnum, extrasubsector_t *xsub, boolean isceiling, fixed_t fixedheight,
+void HWR_AddTransparentFloor(lumpnum_t lumpnum, INT32 texturenum, extrasubsector_t *xsub, boolean isceiling, fixed_t fixedheight,
                              INT32 lightlevel, INT32 alpha, sector_t *FOFSector, FBITFIELD blend, boolean fogplane, extracolormap_t *planecolormap);
-void HWR_AddTransparentPolyobjectFloor(lumpnum_t lumpnum, polyobj_t *polysector, boolean isceiling, fixed_t fixedheight,
+void HWR_AddTransparentPolyobjectFloor(lumpnum_t lumpnum, INT32 texturenum, polyobj_t *polysector, boolean isceiling, fixed_t fixedheight,
                              INT32 lightlevel, INT32 alpha, sector_t *FOFSector, FBITFIELD blend, extracolormap_t *planecolormap);
 #else
 static void HWR_Add3DWater(lumpnum_t lumpnum, extrasubsector_t *xsub, fixed_t fixedheight,
@@ -522,7 +522,7 @@ static UINT8 HWR_FogBlockAlpha(INT32 light, UINT32 color) // Let's see if this c
 // HWR_RenderPlane  : Render a floor or ceiling convex polygon
 // -----------------+
 static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean isceiling, fixed_t fixedheight,
-                           FBITFIELD PolyFlags, INT32 lightlevel, lumpnum_t lumpnum, sector_t *FOFsector, UINT8 alpha, boolean fogplane, extracolormap_t *planecolormap)
+                           FBITFIELD PolyFlags, INT32 lightlevel, lumpnum_t lumpnum, INT32 texturenum, sector_t *FOFsector, UINT8 alpha, boolean fogplane, extracolormap_t *planecolormap)
 {
 	polyvertex_t *  pv;
 	float           height; //constant y for all points on the convex flat polygon
@@ -530,8 +530,9 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 	INT32             nrPlaneVerts;   //verts original define of convex flat polygon
 	INT32             i;
 	float           flatxref,flatyref;
-	float fflatsize;
+	float fflatwidth, fflatheight;
 	INT32 flatflag;
+	boolean texflat = true;
 	size_t len;
 	float scrollx = 0.0f, scrolly = 0.0f;
 	angle_t angle = 0;
@@ -540,6 +541,7 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 #ifdef ESLOPE
 	pslope_t *slope = NULL;
 #endif
+	patch_t *patch;
 
 	static FOutVector *planeVerts = NULL;
 	static UINT16 numAllocedPlaneVerts = 0;
@@ -580,9 +582,10 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 	if (nrPlaneVerts < 3)   //not even a triangle ?
 		return;
 
-	if (nrPlaneVerts > (INT32)UINT16_MAX) // FIXME: exceeds plVerts size
+	// This check is so inconsistent between functions, it hurts.
+	if (nrPlaneVerts > INT16_MAX) // FIXME: exceeds plVerts size
 	{
-		CONS_Debug(DBG_RENDER, "polygon size of %d exceeds max value of %d vertices\n", nrPlaneVerts, UINT16_MAX);
+		CONS_Debug(DBG_RENDER, "polygon size of %d exceeds max value of %d vertices\n", nrPlaneVerts, INT16_MAX);
 		return;
 	}
 
@@ -599,38 +602,47 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 	switch (len)
 	{
 		case 4194304: // 2048x2048 lump
-			fflatsize = 2048.0f;
-			flatflag = 2047;
+			fflatwidth = fflatheight = 2048.0f;
 			break;
 		case 1048576: // 1024x1024 lump
-			fflatsize = 1024.0f;
-			flatflag = 1023;
+			fflatwidth = fflatheight = 1024.0f;
 			break;
 		case 262144:// 512x512 lump
-			fflatsize = 512.0f;
-			flatflag = 511;
+			fflatwidth = fflatheight = 512.0f;
 			break;
 		case 65536: // 256x256 lump
-			fflatsize = 256.0f;
-			flatflag = 255;
+			fflatwidth = fflatheight = 256.0f;
 			break;
 		case 16384: // 128x128 lump
-			fflatsize = 128.0f;
-			flatflag = 127;
+			fflatwidth = fflatheight = 128.0f;
 			break;
 		case 1024: // 32x32 lump
-			fflatsize = 32.0f;
-			flatflag = 31;
+			fflatwidth = fflatheight = 32.0f;
 			break;
 		default: // 64x64 lump
-			fflatsize = 64.0f;
-			flatflag = 63;
+			fflatwidth = fflatheight = 64.0f;
 			break;
 	}
 
+	flatflag = ((INT32)fflatwidth)-1;
+
+	if (texturenum != 0 && texturenum != -1)
+	{
+		fflatwidth = textures[texturenum]->width;
+		fflatheight = textures[texturenum]->height;
+	}
+	else if (gr_patchflat && R_CheckIfPatch(gr_patchflat))		// Just in case?
+	{
+		patch = (patch_t *)W_CacheLumpNum(gr_patchflat, PU_STATIC);
+		fflatwidth = SHORT(patch->width);
+		fflatheight = SHORT(patch->height);
+	}
+	else
+		texflat = false;
+
 	// reference point for flat texture coord for each vertex around the polygon
-	flatxref = (float)(((fixed_t)pv->x & (~flatflag)) / fflatsize);
-	flatyref = (float)(((fixed_t)pv->y & (~flatflag)) / fflatsize);
+	flatxref = (float)(((fixed_t)pv->x & (~flatflag)) / fflatwidth);
+	flatyref = (float)(((fixed_t)pv->y & (~flatflag)) / fflatheight);
 
 	// transform
 	v3d = planeVerts;
@@ -639,14 +651,14 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 	{
 		if (!isceiling) // it's a floor
 		{
-			scrollx = FIXED_TO_FLOAT(FOFsector->floor_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(FOFsector->floor_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(FOFsector->floor_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(FOFsector->floor_yoffs)/fflatheight;
 			angle = FOFsector->floorpic_angle;
 		}
 		else // it's a ceiling
 		{
-			scrollx = FIXED_TO_FLOAT(FOFsector->ceiling_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(FOFsector->ceiling_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(FOFsector->ceiling_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(FOFsector->ceiling_yoffs)/fflatheight;
 			angle = FOFsector->ceilingpic_angle;
 		}
 	}
@@ -654,14 +666,14 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 	{
 		if (!isceiling) // it's a floor
 		{
-			scrollx = FIXED_TO_FLOAT(gr_frontsector->floor_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(gr_frontsector->floor_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(gr_frontsector->floor_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(gr_frontsector->floor_yoffs)/fflatheight;
 			angle = gr_frontsector->floorpic_angle;
 		}
 		else // it's a ceiling
 		{
-			scrollx = FIXED_TO_FLOAT(gr_frontsector->ceiling_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(gr_frontsector->ceiling_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(gr_frontsector->ceiling_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(gr_frontsector->ceiling_yoffs)/fflatheight;
 			angle = gr_frontsector->ceilingpic_angle;
 		}
 	}
@@ -680,17 +692,24 @@ static void HWR_RenderPlane(sector_t *sector, extrasubsector_t *xsub, boolean is
 	for (i = 0; i < nrPlaneVerts; i++,v3d++,pv++)
 	{
 		// Hurdler: add scrolling texture on floor/ceiling
-		v3d->sow = (float)((pv->x / fflatsize) - flatxref + scrollx);
-		v3d->tow = (float)(-(pv->y / fflatsize) + flatyref + scrolly);
-
-		//v3d->sow = (float)(pv->x / fflatsize);
-		//v3d->tow = (float)(pv->y / fflatsize);
+		if (texflat)
+		{
+			v3d->sow = (float)(pv->x / fflatwidth) + scrollx;
+			v3d->tow = -(float)(pv->y / fflatheight) + scrolly;
+		}
+		else
+		{
+			v3d->sow = (float)((pv->x / fflatwidth) - flatxref + scrollx);
+			v3d->tow = (float)(flatyref - (pv->y / fflatheight) + scrolly);
+		}
 
 		// Need to rotate before translate
 		if (angle) // Only needs to be done if there's an altered angle
 		{
 			tempxsow = FLOAT_TO_FIXED(v3d->sow);
 			tempytow = FLOAT_TO_FIXED(v3d->tow);
+			if (texflat)
+				tempytow = -tempytow;
 			v3d->sow = (FIXED_TO_FLOAT(FixedMul(tempxsow, FINECOSINE(angle)) - FixedMul(tempytow, FINESINE(angle))));
 			v3d->tow = (FIXED_TO_FLOAT(FixedMul(tempxsow, FINESINE(angle)) + FixedMul(tempytow, FINECOSINE(angle))));
 		}
@@ -3164,21 +3183,23 @@ static inline void HWR_AddPolyObjectSegs(void)
 
 #ifdef POLYOBJECTS_PLANES
 static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling, fixed_t fixedheight,
-									FBITFIELD blendmode, UINT8 lightlevel, lumpnum_t lumpnum, sector_t *FOFsector,
+									FBITFIELD blendmode, UINT8 lightlevel, lumpnum_t lumpnum, INT32 texturenum, sector_t *FOFsector,
 									UINT8 alpha, extracolormap_t *planecolormap)
 {
 	float           height; //constant y for all points on the convex flat polygon
 	FOutVector      *v3d;
 	INT32             i;
 	float           flatxref,flatyref;
-	float fflatsize;
+	float fflatwidth, fflatheight;
 	INT32 flatflag;
+	boolean texflat = true;
 	size_t len;
 	float scrollx = 0.0f, scrolly = 0.0f;
 	angle_t angle = 0;
 	FSurfaceInfo    Surf;
 	fixed_t tempxsow, tempytow;
 	size_t nrPlaneVerts;
+	patch_t *patch;
 
 	static FOutVector *planeVerts = NULL;
 	static UINT16 numAllocedPlaneVerts = 0;
@@ -3209,38 +3230,47 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling,
 	switch (len)
 	{
 		case 4194304: // 2048x2048 lump
-			fflatsize = 2048.0f;
-			flatflag = 2047;
+			fflatwidth = fflatheight = 2048.0f;
 			break;
 		case 1048576: // 1024x1024 lump
-			fflatsize = 1024.0f;
-			flatflag = 1023;
+			fflatwidth = fflatheight = 1024.0f;
 			break;
 		case 262144:// 512x512 lump
-			fflatsize = 512.0f;
-			flatflag = 511;
+			fflatwidth = fflatheight = 512.0f;
 			break;
 		case 65536: // 256x256 lump
-			fflatsize = 256.0f;
-			flatflag = 255;
+			fflatwidth = fflatheight = 256.0f;
 			break;
 		case 16384: // 128x128 lump
-			fflatsize = 128.0f;
-			flatflag = 127;
+			fflatwidth = fflatheight = 128.0f;
 			break;
 		case 1024: // 32x32 lump
-			fflatsize = 32.0f;
-			flatflag = 31;
+			fflatwidth = fflatheight = 32.0f;
 			break;
 		default: // 64x64 lump
-			fflatsize = 64.0f;
-			flatflag = 63;
+			fflatwidth = fflatheight = 64.0f;
 			break;
 	}
 
+	flatflag = ((INT32)fflatwidth)-1;
+
+	if (texturenum != 0 && texturenum != -1)
+	{
+		fflatwidth = textures[texturenum]->width;
+		fflatheight = textures[texturenum]->height;
+	}
+	else if (gr_patchflat && R_CheckIfPatch(gr_patchflat))		// Just in case?
+	{
+		patch = (patch_t *)W_CacheLumpNum(gr_patchflat, PU_STATIC);
+		fflatwidth = SHORT(patch->width);
+		fflatheight = SHORT(patch->height);
+	}
+	else
+		texflat = false;
+
 	// reference point for flat texture coord for each vertex around the polygon
-	flatxref = (float)(((fixed_t)FIXED_TO_FLOAT(polysector->origVerts[0].x) & (~flatflag)) / fflatsize);
-	flatyref = (float)(((fixed_t)FIXED_TO_FLOAT(polysector->origVerts[0].y) & (~flatflag)) / fflatsize);
+	flatxref = (float)((polysector->origVerts[0].x & (~flatflag)) / fflatwidth);
+	flatyref = (float)((polysector->origVerts[0].y & (~flatflag)) / fflatheight);
 
 	// transform
 	v3d = planeVerts;
@@ -3249,14 +3279,14 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling,
 	{
 		if (!isceiling) // it's a floor
 		{
-			scrollx = FIXED_TO_FLOAT(FOFsector->floor_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(FOFsector->floor_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(FOFsector->floor_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(FOFsector->floor_yoffs)/fflatheight;
 			angle = FOFsector->floorpic_angle>>ANGLETOFINESHIFT;
 		}
 		else // it's a ceiling
 		{
-			scrollx = FIXED_TO_FLOAT(FOFsector->ceiling_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(FOFsector->ceiling_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(FOFsector->ceiling_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(FOFsector->ceiling_yoffs)/fflatheight;
 			angle = FOFsector->ceilingpic_angle>>ANGLETOFINESHIFT;
 		}
 	}
@@ -3264,14 +3294,14 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling,
 	{
 		if (!isceiling) // it's a floor
 		{
-			scrollx = FIXED_TO_FLOAT(gr_frontsector->floor_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(gr_frontsector->floor_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(gr_frontsector->floor_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(gr_frontsector->floor_yoffs)/fflatheight;
 			angle = gr_frontsector->floorpic_angle>>ANGLETOFINESHIFT;
 		}
 		else // it's a ceiling
 		{
-			scrollx = FIXED_TO_FLOAT(gr_frontsector->ceiling_xoffs)/fflatsize;
-			scrolly = FIXED_TO_FLOAT(gr_frontsector->ceiling_yoffs)/fflatsize;
+			scrollx = FIXED_TO_FLOAT(gr_frontsector->ceiling_xoffs)/fflatwidth;
+			scrolly = FIXED_TO_FLOAT(gr_frontsector->ceiling_yoffs)/fflatheight;
 			angle = gr_frontsector->ceilingpic_angle>>ANGLETOFINESHIFT;
 		}
 	}
@@ -3294,15 +3324,26 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling,
 
 	for (i = 0; i < (INT32)nrPlaneVerts; i++,v3d++)
 	{
-		// Hurdler: add scrolling texture on floor/ceiling
-		v3d->sow = (float)((FIXED_TO_FLOAT(polysector->origVerts[i].x) / fflatsize) - flatxref + scrollx); // Go from the polysector's original vertex locations
-		v3d->tow = (float)(flatyref - (FIXED_TO_FLOAT(polysector->origVerts[i].y) / fflatsize) + scrolly); // Means the flat is offset based on the original vertex locations
+		// Go from the polysector's original vertex locations
+		// Means the flat is offset based on the original vertex locations
+		if (texflat)
+		{
+			v3d->sow = (float)(FIXED_TO_FLOAT(polysector->origVerts[i].x) / fflatwidth) + scrollx;
+			v3d->tow = -(float)(FIXED_TO_FLOAT(polysector->origVerts[i].y) / fflatheight) + scrolly;
+		}
+		else
+		{
+			v3d->sow = (float)((FIXED_TO_FLOAT(polysector->origVerts[i].x) / fflatwidth) - flatxref + scrollx);
+			v3d->tow = (float)(flatyref - (FIXED_TO_FLOAT(polysector->origVerts[i].y) / fflatheight) + scrolly);
+		}
 
 		// Need to rotate before translate
 		if (angle) // Only needs to be done if there's an altered angle
 		{
 			tempxsow = FLOAT_TO_FIXED(v3d->sow);
 			tempytow = FLOAT_TO_FIXED(v3d->tow);
+			if (texflat)
+				tempytow = -tempytow;
 			v3d->sow = (FIXED_TO_FLOAT(FixedMul(tempxsow, FINECOSINE(angle)) - FixedMul(tempytow, FINESINE(angle))));
 			v3d->tow = (FIXED_TO_FLOAT(-FixedMul(tempxsow, FINESINE(angle)) - FixedMul(tempytow, FINECOSINE(angle))));
 		}
@@ -3365,6 +3406,7 @@ static void HWR_AddPolyObjectPlanes(void)
 			else
 			{
 				HWR_GetFlat(levelflats[polyobjsector->floorpic].lumpnum);
+				HWR_GetTextureFlat(levelflats[polyobjsector->floorpic].texturenum);
 				HWR_RenderPolyObjectPlane(po_ptrs[i], false, polyobjsector->floorheight, PF_Occlude,
 										(light == -1 ? gr_frontsector->lightlevel : *gr_frontsector->lightlist[light].lightlevel), levelflats[polyobjsector->floorpic].lumpnum,
 										polyobjsector, 255, (light == -1 ? gr_frontsector->extra_colormap : *gr_frontsector->lightlist[light].extra_colormap));
@@ -3388,6 +3430,7 @@ static void HWR_AddPolyObjectPlanes(void)
 			else
 			{
 				HWR_GetFlat(levelflats[polyobjsector->ceilingpic].lumpnum);
+				HWR_GetTextureFlat(levelflats[polyobjsector->ceilingpic].texturenum);
 				HWR_RenderPolyObjectPlane(po_ptrs[i], true, polyobjsector->ceilingheight, PF_Occlude,
 				                          (light == -1 ? gr_frontsector->lightlevel : *gr_frontsector->lightlist[light].lightlevel), levelflats[polyobjsector->floorpic].lumpnum,
 				                          polyobjsector, 255, (light == -1 ? gr_frontsector->extra_colormap : *gr_frontsector->lightlist[light].extra_colormap));
@@ -3541,11 +3584,12 @@ static void HWR_Subsector(size_t num)
 			if (sub->validcount != validcount)
 			{
 				HWR_GetFlat(levelflats[gr_frontsector->floorpic].lumpnum);
+				HWR_GetTextureFlat(levelflats[gr_frontsector->floorpic].texturenum);
 				HWR_RenderPlane(gr_frontsector, &extrasubsectors[num], false,
 					// Hack to make things continue to work around slopes.
 					locFloorHeight == cullFloorHeight ? locFloorHeight : gr_frontsector->floorheight,
 					// We now return you to your regularly scheduled rendering.
-					PF_Occlude, floorlightlevel, levelflats[gr_frontsector->floorpic].lumpnum, NULL, 255, false, floorcolormap);
+					PF_Occlude, floorlightlevel, levelflats[gr_frontsector->floorpic].lumpnum, levelflats[gr_frontsector->floorpic].texturenum, NULL, 255, false, floorcolormap);
 			}
 		}
 		else
@@ -3563,11 +3607,12 @@ static void HWR_Subsector(size_t num)
 			if (sub->validcount != validcount)
 			{
 				HWR_GetFlat(levelflats[gr_frontsector->ceilingpic].lumpnum);
+				HWR_GetTextureFlat(levelflats[gr_frontsector->ceilingpic].texturenum);
 				HWR_RenderPlane(NULL, &extrasubsectors[num], true,
 					// Hack to make things continue to work around slopes.
 					locCeilingHeight == cullCeilingHeight ? locCeilingHeight : gr_frontsector->ceilingheight,
 					// We now return you to your regularly scheduled rendering.
-					PF_Occlude, ceilinglightlevel, levelflats[gr_frontsector->ceilingpic].lumpnum,NULL, 255, false, ceilingcolormap);
+					PF_Occlude, ceilinglightlevel, levelflats[gr_frontsector->ceilingpic].lumpnum, levelflats[gr_frontsector->ceilingpic].texturenum, NULL, 255, false, ceilingcolormap);
 			}
 		}
 		else
@@ -3626,7 +3671,7 @@ static void HWR_Subsector(size_t num)
 					else
 						alpha = HWR_FogBlockAlpha(*gr_frontsector->lightlist[light].lightlevel, NORMALFOG);
 
-					HWR_AddTransparentFloor(0,
+					HWR_AddTransparentFloor(0, 0,
 					                       &extrasubsectors[num],
 										   false,
 					                       *rover->bottomheight,
@@ -3645,6 +3690,7 @@ static void HWR_Subsector(size_t num)
 					               rover->alpha-1, rover->master->frontsector);
 #else
 					HWR_AddTransparentFloor(levelflats[*rover->bottompic].lumpnum,
+											levelflats[*rover->bottompic].texturenum,
 					                       &extrasubsectors[num],
 										   false,
 					                       *rover->bottomheight,
@@ -3656,8 +3702,9 @@ static void HWR_Subsector(size_t num)
 				else
 				{
 					HWR_GetFlat(levelflats[*rover->bottompic].lumpnum);
+					HWR_GetTextureFlat(levelflats[*rover->bottompic].texturenum);
 					light = R_GetPlaneLight(gr_frontsector, centerHeight, dup_viewz < cullHeight ? true : false);
-					HWR_RenderPlane(NULL, &extrasubsectors[num], false, *rover->bottomheight, PF_Occlude, *gr_frontsector->lightlist[light].lightlevel, levelflats[*rover->bottompic].lumpnum,
+					HWR_RenderPlane(NULL, &extrasubsectors[num], false, *rover->bottomheight, PF_Occlude, *gr_frontsector->lightlist[light].lightlevel, levelflats[*rover->bottompic].lumpnum, levelflats[*rover->bottompic].texturenum,
 					                rover->master->frontsector, 255, false, *gr_frontsector->lightlist[light].extra_colormap);
 				}
 			}
@@ -3689,7 +3736,7 @@ static void HWR_Subsector(size_t num)
 					else
 						alpha = HWR_FogBlockAlpha(*gr_frontsector->lightlist[light].lightlevel, NORMALFOG);
 
-					HWR_AddTransparentFloor(0,
+					HWR_AddTransparentFloor(0, 0,
 					                       &extrasubsectors[num],
 										   true,
 					                       *rover->topheight,
@@ -3708,6 +3755,7 @@ static void HWR_Subsector(size_t num)
 					                          rover->alpha-1, rover->master->frontsector);
 #else
 					HWR_AddTransparentFloor(levelflats[*rover->toppic].lumpnum,
+											levelflats[*rover->bottompic].texturenum,
 					                        &extrasubsectors[num],
 											true,
 					                        *rover->topheight,
@@ -3720,8 +3768,9 @@ static void HWR_Subsector(size_t num)
 				else
 				{
 					HWR_GetFlat(levelflats[*rover->toppic].lumpnum);
+					HWR_GetTextureFlat(levelflats[*rover->toppic].texturenum);
 					light = R_GetPlaneLight(gr_frontsector, centerHeight, dup_viewz < cullHeight ? true : false);
-					HWR_RenderPlane(NULL, &extrasubsectors[num], true, *rover->topheight, PF_Occlude, *gr_frontsector->lightlist[light].lightlevel, levelflats[*rover->toppic].lumpnum,
+					HWR_RenderPlane(NULL, &extrasubsectors[num], true, *rover->topheight, PF_Occlude, *gr_frontsector->lightlist[light].lightlevel, levelflats[*rover->toppic].lumpnum, levelflats[*rover->toppic].texturenum,
 					                  rover->master->frontsector, 255, false, *gr_frontsector->lightlist[light].extra_colormap);
 				}
 			}
@@ -5050,6 +5099,7 @@ typedef struct
 	fixed_t fixedheight;
 	INT32 lightlevel;
 	lumpnum_t lumpnum;
+	INT32 texturenum;
 	INT32 alpha;
 	sector_t *FOFSector;
 	FBITFIELD blend;
@@ -5068,6 +5118,7 @@ typedef struct
 	fixed_t fixedheight;
 	INT32 lightlevel;
 	lumpnum_t lumpnum;
+	INT32 texturenum;
 	INT32 alpha;
 	sector_t *FOFSector;
 	FBITFIELD blend;
@@ -5098,7 +5149,7 @@ static INT32 drawcount = 0;
 #define MAX_TRANSPARENTFLOOR 512
 
 // This will likely turn into a copy of HWR_Add3DWater and replace it.
-void HWR_AddTransparentFloor(lumpnum_t lumpnum, extrasubsector_t *xsub, boolean isceiling,
+void HWR_AddTransparentFloor(lumpnum_t lumpnum, INT32 texturenum, extrasubsector_t *xsub, boolean isceiling,
 	fixed_t fixedheight, INT32 lightlevel, INT32 alpha, sector_t *FOFSector, FBITFIELD blend, boolean fogplane, extracolormap_t *planecolormap)
 {
 	static size_t allocedplanes = 0;
@@ -5117,6 +5168,7 @@ void HWR_AddTransparentFloor(lumpnum_t lumpnum, extrasubsector_t *xsub, boolean
 	planeinfo[numplanes].fixedheight = fixedheight;
 	planeinfo[numplanes].lightlevel = lightlevel;
 	planeinfo[numplanes].lumpnum = lumpnum;
+	planeinfo[numplanes].texturenum = texturenum;
 	planeinfo[numplanes].xsub = xsub;
 	planeinfo[numplanes].alpha = alpha;
 	planeinfo[numplanes].FOFSector = FOFSector;
@@ -5130,7 +5182,7 @@ void HWR_AddTransparentFloor(lumpnum_t lumpnum, extrasubsector_t *xsub, boolean
 
 // Adding this for now until I can create extrasubsector info for polyobjects
 // When that happens it'll just be done through HWR_AddTransparentFloor and HWR_RenderPlane
-void HWR_AddTransparentPolyobjectFloor(lumpnum_t lumpnum, polyobj_t *polysector, boolean isceiling,
+void HWR_AddTransparentPolyobjectFloor(lumpnum_t lumpnum, INT32 texturenum, polyobj_t *polysector, boolean isceiling,
 	fixed_t fixedheight, INT32 lightlevel, INT32 alpha, sector_t *FOFSector, FBITFIELD blend, extracolormap_t *planecolormap)
 {
 	static size_t allocedpolyplanes = 0;
@@ -5149,6 +5201,7 @@ void HWR_AddTransparentPolyobjectFloor(lumpnum_t lumpnum, polyobj_t *polysector,
 	polyplaneinfo[numpolyplanes].fixedheight = fixedheight;
 	polyplaneinfo[numpolyplanes].lightlevel = lightlevel;
 	polyplaneinfo[numpolyplanes].lumpnum = lumpnum;
+	polyplaneinfo[numpolyplanes].texturenum = texturenum;
 	polyplaneinfo[numpolyplanes].polysector = polysector;
 	polyplaneinfo[numpolyplanes].alpha = alpha;
 	polyplaneinfo[numpolyplanes].FOFSector = FOFSector;
@@ -5310,9 +5363,12 @@ static void HWR_CreateDrawNodes(void)
 			gr_frontsector = NULL;
 
 			if (!(sortnode[sortindex[i]].plane->blend & PF_NoTexture))
+			{
 				HWR_GetFlat(sortnode[sortindex[i]].plane->lumpnum);
+				HWR_GetTextureFlat(sortnode[sortindex[i]].plane->texturenum);
+			}
 			HWR_RenderPlane(NULL, sortnode[sortindex[i]].plane->xsub, sortnode[sortindex[i]].plane->isceiling, sortnode[sortindex[i]].plane->fixedheight, sortnode[sortindex[i]].plane->blend, sortnode[sortindex[i]].plane->lightlevel,
-				sortnode[sortindex[i]].plane->lumpnum, sortnode[sortindex[i]].plane->FOFSector, sortnode[sortindex[i]].plane->alpha, sortnode[sortindex[i]].plane->fogplane, sortnode[sortindex[i]].plane->planecolormap);
+				sortnode[sortindex[i]].plane->lumpnum, sortnode[sortindex[i]].plane->texturenum, sortnode[sortindex[i]].plane->FOFSector, sortnode[sortindex[i]].plane->alpha, sortnode[sortindex[i]].plane->fogplane, sortnode[sortindex[i]].plane->planecolormap);
 		}
 		else if (sortnode[sortindex[i]].polyplane)
 		{
@@ -5320,9 +5376,12 @@ static void HWR_CreateDrawNodes(void)
 			gr_frontsector = NULL;
 
 			if (!(sortnode[sortindex[i]].polyplane->blend & PF_NoTexture))
+			{
 				HWR_GetFlat(sortnode[sortindex[i]].polyplane->lumpnum);
+				HWR_GetTextureFlat(sortnode[sortindex[i]].polyplane->texturenum);
+			}
 			HWR_RenderPolyObjectPlane(sortnode[sortindex[i]].polyplane->polysector, sortnode[sortindex[i]].polyplane->isceiling, sortnode[sortindex[i]].polyplane->fixedheight, sortnode[sortindex[i]].polyplane->blend, sortnode[sortindex[i]].polyplane->lightlevel,
-				sortnode[sortindex[i]].polyplane->lumpnum, sortnode[sortindex[i]].polyplane->FOFSector, sortnode[sortindex[i]].polyplane->alpha, sortnode[sortindex[i]].polyplane->planecolormap);
+				sortnode[sortindex[i]].polyplane->lumpnum, sortnode[sortindex[i]].polyplane->texturenum, sortnode[sortindex[i]].polyplane->FOFSector, sortnode[sortindex[i]].polyplane->alpha, sortnode[sortindex[i]].polyplane->planecolormap);
 		}
 		else if (sortnode[sortindex[i]].wall)
 		{
diff --git a/src/hardware/hw_md2.c b/src/hardware/hw_md2.c
index d4728315ad88f97e5f1f16ac16ed037a8f7376ce..7b6367cf33fba40174d08ab75fd3925dae33f2a0 100644
--- a/src/hardware/hw_md2.c
+++ b/src/hardware/hw_md2.c
@@ -747,10 +747,12 @@ static void md2_loadTexture(md2_t *model)
 		grpatch->mipmap.width = (UINT16)w;
 		grpatch->mipmap.height = (UINT16)h;
 
+#ifdef GLIDE_API_COMPATIBILITY
 		// not correct!
 		grpatch->mipmap.grInfo.smallLodLog2 = GR_LOD_LOG2_256;
 		grpatch->mipmap.grInfo.largeLodLog2 = GR_LOD_LOG2_256;
 		grpatch->mipmap.grInfo.aspectRatioLog2 = GR_ASPECT_LOG2_1x1;
+#endif
 	}
 	HWD.pfnSetTexture(&grpatch->mipmap);
 	HWR_UnlockCachedPatch(grpatch);
@@ -798,10 +800,12 @@ static void md2_loadBlendTexture(md2_t *model)
 		grpatch->mipmap.width = (UINT16)w;
 		grpatch->mipmap.height = (UINT16)h;
 
+#ifdef GLIDE_API_COMPATIBILITY
 		// not correct!
 		grpatch->mipmap.grInfo.smallLodLog2 = GR_LOD_LOG2_256;
 		grpatch->mipmap.grInfo.largeLodLog2 = GR_LOD_LOG2_256;
 		grpatch->mipmap.grInfo.aspectRatioLog2 = GR_ASPECT_LOG2_1x1;
+#endif
 	}
 	HWD.pfnSetTexture(&grpatch->mipmap); // We do need to do this so that it can be cleared and knows to recreate it when necessary
 	HWR_UnlockCachedPatch(grpatch);
diff --git a/src/p_setup.c b/src/p_setup.c
index 65335be3f5239acbf45f1431c4f06522ef0343db..60e036a872824793a18d2a1fb554e8f25f377a0b 100644
--- a/src/p_setup.c
+++ b/src/p_setup.c
@@ -573,6 +573,11 @@ INT32 P_AddLevelFlat(const char *flatname, levelflat_t *levelflat)
 
 		// store the flat lump number
 		levelflat->lumpnum = R_GetFlatNumForName(flatname);
+		levelflat->texturenum = R_CheckTextureNumForName(flatname);
+		levelflat->lasttexturenum = levelflat->texturenum;
+
+		levelflat->baselumpnum = LUMPERROR;
+		levelflat->basetexturenum = -1;
 
 #ifndef ZDEBUG
 		CONS_Debug(DBG_SETUP, "flat #%03d: %s\n", atoi(sizeu1(numlevelflats)), levelflat->name);
@@ -617,6 +622,11 @@ INT32 P_AddLevelFlatRuntime(const char *flatname)
 
 		// store the flat lump number
 		levelflat->lumpnum = R_GetFlatNumForName(flatname);
+		levelflat->texturenum = R_CheckTextureNumForName(flatname);
+		levelflat->lasttexturenum = levelflat->texturenum;
+
+		levelflat->baselumpnum = LUMPERROR;
+		levelflat->basetexturenum = -1;
 
 #ifndef ZDEBUG
 		CONS_Debug(DBG_SETUP, "flat #%03d: %s\n", atoi(sizeu1(numlevelflats)), levelflat->name);
diff --git a/src/p_setup.h b/src/p_setup.h
index 7e8a5d7e6bd31185d48c936eb95e7b33cf847413..7e3a149eb882965e436dbc4ff4327d0fcefa8a00 100644
--- a/src/p_setup.h
+++ b/src/p_setup.h
@@ -37,12 +37,19 @@ typedef struct
 {
 	char name[9]; // resource name from wad
 	lumpnum_t lumpnum; // lump number of the flat
+	INT32 texturenum, lasttexturenum; // texture number of the flat
+	UINT16 width, height;
+	fixed_t topoffset, leftoffset;
 
 	// for flat animation
 	lumpnum_t baselumpnum;
+	INT32 basetexturenum;
 	INT32 animseq; // start pos. in the anim sequence
 	INT32 numpics;
 	INT32 speed;
+
+	// for patchflats
+	UINT8 *flatpatch;
 } levelflat_t;
 
 extern size_t numlevelflats;
diff --git a/src/p_spec.c b/src/p_spec.c
index 7742554cd7a7dbcec6d9e201083ce95cc8cfa112..256ca3453f0017311392d0b7d7d3043f7fe576a6 100644
--- a/src/p_spec.c
+++ b/src/p_spec.c
@@ -205,8 +205,8 @@ void P_InitPicAnims(void)
 			if ((W_CheckNumForName(animdefs[i].startname)) == LUMPERROR)
 				continue;
 
-			lastanim->picnum = R_FlatNumForName(animdefs[i].endname);
-			lastanim->basepic = R_FlatNumForName(animdefs[i].startname);
+			lastanim->picnum = R_GetFlatNumForName(animdefs[i].endname);
+			lastanim->basepic = R_GetFlatNumForName(animdefs[i].startname);
 		}
 
 		lastanim->istexture = animdefs[i].istexture;
@@ -464,7 +464,19 @@ static inline void P_FindAnimatedFlat(INT32 animnum)
 	for (i = 0; i < numlevelflats; i++, foundflats++)
 	{
 		// is that levelflat from the flat anim sequence ?
-		if (foundflats->lumpnum >= startflatnum && foundflats->lumpnum <= endflatnum)
+		if ((anims[animnum].istexture) && (foundflats->texturenum != 0 && foundflats->texturenum != -1)
+			&& ((UINT16)foundflats->texturenum >= startflatnum && (UINT16)foundflats->texturenum <= endflatnum))
+		{
+			foundflats->basetexturenum = startflatnum;
+			foundflats->animseq = foundflats->texturenum - startflatnum;
+			foundflats->numpics = endflatnum - startflatnum + 1;
+			foundflats->speed = anims[animnum].speed;
+
+			CONS_Debug(DBG_SETUP, "animflat: #%03d name:%.8s animseq:%d numpics:%d speed:%d\n",
+					atoi(sizeu1(i)), foundflats->name, foundflats->animseq,
+					foundflats->numpics,foundflats->speed);
+		}
+		else if (foundflats->lumpnum >= startflatnum && foundflats->lumpnum <= endflatnum)
 		{
 			foundflats->baselumpnum = startflatnum;
 			foundflats->animseq = foundflats->lumpnum - startflatnum;
@@ -488,10 +500,7 @@ void P_SetupLevelFlatAnims(void)
 
 	// the original game flat anim sequences
 	for (i = 0; anims[i].istexture != -1; i++)
-	{
-		if (!anims[i].istexture)
-			P_FindAnimatedFlat(i);
-	}
+		P_FindAnimatedFlat(i);
 }
 
 //
@@ -5669,9 +5678,12 @@ void P_UpdateSpecials(void)
 	{
 		if (foundflats->speed) // it is an animated flat
 		{
+			// update the levelflat texture number
+			if (foundflats->basetexturenum != -1)
+				foundflats->texturenum = foundflats->basetexturenum + ((leveltime/foundflats->speed + foundflats->animseq) % foundflats->numpics);
 			// update the levelflat lump number
-			foundflats->lumpnum = foundflats->baselumpnum +
-				((leveltime/foundflats->speed + foundflats->animseq) % foundflats->numpics);
+			else if (foundflats->baselumpnum != LUMPERROR)
+				foundflats->lumpnum = foundflats->baselumpnum + ((leveltime/foundflats->speed + foundflats->animseq) % foundflats->numpics);
 		}
 	}
 }
diff --git a/src/r_data.c b/src/r_data.c
index 6889bdddebf587ce22b3526066fca43aec364ceb..5858117a53dfb74e39c018b54d64b2f32c92800e 100644
--- a/src/r_data.c
+++ b/src/r_data.c
@@ -40,6 +40,28 @@
 #include <errno.h>
 #endif
 
+#ifdef HAVE_PNG
+
+#ifndef _MSC_VER
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#endif
+
+#ifndef _LFS64_LARGEFILE
+#define _LFS64_LARGEFILE
+#endif
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 0
+#endif
+
+#include "png.h"
+#ifndef PNG_READ_SUPPORTED
+#undef HAVE_PNG
+#endif
+#endif
+
 //
 // Texture definition.
 // Each texture is composed of one or more patches,
@@ -98,12 +120,11 @@ INT32 numtextures = 0; // total number of textures found,
 // size of following tables
 
 texture_t **textures = NULL;
+textureflat_t *texflats = NULL;
 static UINT32 **texturecolumnofs; // column offset lookup table for each texture
 static UINT8 **texturecache; // graphics data for each generated full-size texture
 
-// texture width is a power of 2, so it can easily repeat along sidedefs using a simple mask
-INT32 *texturewidthmask;
-
+INT32 *texturewidth;
 fixed_t *textureheight; // needed for texture pegging
 
 INT32 *texturetranslation;
@@ -315,7 +336,7 @@ static inline void R_DrawTransFlippedColumnInCache(column_t *patch, UINT8 *cache
 // Allocate space for full size texture, either single patch or 'composite'
 // Build the full textures from patches.
 // The texture caching system is a little more hungry of memory, but has
-// been simplified for the sake of highcolor, dynamic ligthing, & speed.
+// been simplified for the sake of highcolor (lol), dynamic ligthing, & speed.
 //
 // This is not optimised, but it's supposed to be executed only once
 // per level, when enough memory is available.
@@ -332,6 +353,10 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 	column_t *patchcol;
 	UINT32 *colofs;
 
+	UINT16 wadnum;
+	lumpnum_t lumpnum;
+	size_t lumplength;
+
 	I_Assert(texnum <= (size_t)numtextures);
 	texture = textures[texnum];
 	I_Assert(texture != NULL);
@@ -346,7 +371,19 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 	{
 		boolean holey = false;
 		patch = texture->patches;
-		realpatch = W_CacheLumpNumPwad(patch->wad, patch->lump, PU_CACHE);
+
+		wadnum = patch->wad;
+		lumpnum = patch->lump;
+		lumplength = W_LumpLengthPwad(wadnum, lumpnum);
+		realpatch = W_CacheLumpNumPwad(wadnum, lumpnum, PU_CACHE);
+
+#ifndef NO_PNG_LUMPS
+		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
+		{
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			goto multipatch;
+		}
+#endif
 
 		// Check the patch for holes.
 		if (texture->width > SHORT(realpatch->width) || texture->height > SHORT(realpatch->height))
@@ -376,7 +413,7 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 		{
 			texture->holes = true;
 			texture->flip = patch->flip;
-			blocksize = W_LumpLengthPwad(patch->wad, patch->lump);
+			blocksize = lumplength;
 			block = Z_Calloc(blocksize, PU_STATIC, // will change tag at end of this function
 				&texturecache[texnum]);
 			M_Memcpy(block, realpatch, blocksize);
@@ -403,6 +440,9 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 	}
 
 	// multi-patch textures (or 'composite')
+#ifndef NO_PNG_LUMPS
+	multipatch:
+#endif
 	texture->holes = false;
 	texture->flip = 0;
 	blocksize = (texture->width * 4) + (texture->width * texture->height);
@@ -433,7 +473,15 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 			ColumnDrawerPointer = (patch->flip & 2) ? R_DrawFlippedColumnInCache : R_DrawColumnInCache;
 		}
 
-		realpatch = W_CacheLumpNumPwad(patch->wad, patch->lump, PU_CACHE);
+		wadnum = patch->wad;
+		lumpnum = patch->lump;
+		lumplength = W_LumpLengthPwad(wadnum, lumpnum);
+		realpatch = W_CacheLumpNumPwad(wadnum, lumpnum, PU_CACHE);
+#ifndef NO_PNG_LUMPS
+		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+#endif
+
 		x1 = patch->originx;
 		width = SHORT(realpatch->width);
 		height = SHORT(realpatch->height);
@@ -509,10 +557,14 @@ void R_CheckTextureCache(INT32 tex)
 UINT8 *R_GetColumn(fixed_t tex, INT32 col)
 {
 	UINT8 *data;
+	INT32 width = texturewidth[tex];
 
-	col &= texturewidthmask[tex];
-	data = texturecache[tex];
+	if (width & (width - 1))
+		col = (UINT32)col % width;
+	else
+		col &= (width - 1);
 
+	data = texturecache[tex];
 	if (!data)
 		data = R_GenerateTexture(tex);
 
@@ -550,7 +602,7 @@ void R_ParseTEXTURESLump(UINT16 wadNum, UINT16 lumpNum, INT32 *index);
 #define TX_END "TX_END"
 void R_LoadTextures(void)
 {
-	INT32 i, k, w;
+	INT32 i, w;
 	UINT16 j;
 	UINT16 texstart, texend, texturesLumpPos;
 	patch_t *patchlump;
@@ -567,6 +619,7 @@ void R_LoadTextures(void)
 		}
 		Z_Free(texturetranslation);
 		Z_Free(textures);
+		Z_Free(texflats);
 	}
 
 	// Load patches and textures.
@@ -627,15 +680,16 @@ void R_LoadTextures(void)
 	// Allocate memory and initialize to 0 for all the textures we are initialising.
 	// There are actually 5 buffers allocated in one for convenience.
 	textures = Z_Calloc((numtextures * sizeof(void *)) * 5, PU_STATIC, NULL);
+	texflats = Z_Calloc((numtextures * sizeof(*texflats)), PU_STATIC, NULL);
 
 	// Allocate texture column offset table.
 	texturecolumnofs = (void *)((UINT8 *)textures + (numtextures * sizeof(void *)));
 	// Allocate texture referencing cache.
-	texturecache	 = (void *)((UINT8 *)textures + ((numtextures * sizeof(void *)) * 2));
-	// Allocate texture width mask table.
-	texturewidthmask = (void *)((UINT8 *)textures + ((numtextures * sizeof(void *)) * 3));
-	// Allocate texture height mask table.
-	textureheight	= (void *)((UINT8 *)textures + ((numtextures * sizeof(void *)) * 4));
+	texturecache     = (void *)((UINT8 *)textures + ((numtextures * sizeof(void *)) * 2));
+	// Allocate texture width table.
+	texturewidth     = (void *)((UINT8 *)textures + ((numtextures * sizeof(void *)) * 3));
+	// Allocate texture height table.
+	textureheight    = (void *)((UINT8 *)textures + ((numtextures * sizeof(void *)) * 4));
 	// Create translation table for global animation.
 	texturetranslation = Z_Malloc((numtextures + 1) * sizeof(*texturetranslation), PU_STATIC, NULL);
 
@@ -673,20 +727,39 @@ void R_LoadTextures(void)
 		// Work through each lump between the markers in the WAD.
 		for (j = 0; j < (texend - texstart); j++)
 		{
+			UINT16 wadnum = (UINT16)w;
+			lumpnum_t lumpnum = texstart + j;
+			size_t lumplength;
+
 			if (wadfiles[w]->type == RET_PK3)
 			{
-				if (W_IsLumpFolder((UINT16)w, texstart + j)) // Check if lump is a folder
+				if (W_IsLumpFolder(wadnum, lumpnum)) // Check if lump is a folder
 					continue; // If it is then SKIP IT
 			}
-			patchlump = W_CacheLumpNumPwad((UINT16)w, texstart + j, PU_CACHE);
+
+			lumplength = W_LumpLengthPwad(wadnum, lumpnum);
+			patchlump = W_CacheLumpNumPwad(wadnum, lumpnum, PU_CACHE);
 
 			//CONS_Printf("\n\"%s\" is a single patch, dimensions %d x %d",W_CheckNameForNumPwad((UINT16)w,texstart+j),patchlump->width, patchlump->height);
 			texture = textures[i] = Z_Calloc(sizeof(texture_t) + sizeof(texpatch_t), PU_STATIC, NULL);
 
 			// Set texture properties.
-			M_Memcpy(texture->name, W_CheckNameForNumPwad((UINT16)w, texstart + j), sizeof(texture->name));
-			texture->width = SHORT(patchlump->width);
-			texture->height = SHORT(patchlump->height);
+			M_Memcpy(texture->name, W_CheckNameForNumPwad(wadnum, lumpnum), sizeof(texture->name));
+
+#ifndef NO_PNG_LUMPS
+			if (R_IsLumpPNG((UINT8 *)patchlump, lumplength))
+			{
+				INT16 width, height;
+				R_PNGDimensions((UINT8 *)patchlump, &width, &height, lumplength);
+				texture->width = width;
+				texture->height = height;
+			}
+			else
+#endif
+			{
+				texture->width = SHORT(patchlump->width);
+				texture->height = SHORT(patchlump->height);
+			}
 			texture->patchcount = 1;
 			texture->holes = false;
 			texture->flip = 0;
@@ -701,11 +774,7 @@ void R_LoadTextures(void)
 
 			Z_Unlock(patchlump);
 
-			k = 1;
-			while (k << 1 <= texture->width)
-				k <<= 1;
-
-			texturewidthmask[i] = k - 1;
+			texturewidth[i] = texture->width;
 			textureheight[i] = texture->height << FRACBITS;
 			i++;
 		}
@@ -1097,7 +1166,7 @@ int R_CountTexturesInTEXTURESLump(UINT16 wadNum, UINT16 lumpNum)
 	texturesToken = M_GetToken(texturesText);
 	while (texturesToken != NULL)
 	{
-		if (stricmp(texturesToken, "WALLTEXTURE")==0)
+		if (stricmp(texturesToken, "WALLTEXTURE") == 0 || stricmp(texturesToken, "TEXTURE") == 0)
 		{
 			numTexturesInLump++;
 			Z_Free(texturesToken);
@@ -1105,7 +1174,7 @@ int R_CountTexturesInTEXTURESLump(UINT16 wadNum, UINT16 lumpNum)
 		}
 		else
 		{
-			I_Error("Error parsing TEXTURES lump: Expected \"WALLTEXTURE\", got \"%s\"",texturesToken);
+			I_Error("Error parsing TEXTURES lump: Expected \"WALLTEXTURE\" or \"TEXTURE\", got \"%s\"",texturesToken);
 		}
 		texturesToken = M_GetToken(NULL);
 	}
@@ -1146,21 +1215,21 @@ void R_ParseTEXTURESLump(UINT16 wadNum, UINT16 lumpNum, INT32 *texindex)
 	texturesToken = M_GetToken(texturesText);
 	while (texturesToken != NULL)
 	{
-		if (stricmp(texturesToken, "WALLTEXTURE")==0)
+		if (stricmp(texturesToken, "WALLTEXTURE") == 0 || stricmp(texturesToken, "TEXTURE") == 0)
 		{
 			Z_Free(texturesToken);
 			// Get the new texture
 			newTexture = R_ParseTexture(true);
 			// Store the new texture
 			textures[*texindex] = newTexture;
-			texturewidthmask[*texindex] = newTexture->width - 1;
+			texturewidth[*texindex] = newTexture->width;
 			textureheight[*texindex] = newTexture->height << FRACBITS;
 			// Increment i back in R_LoadTextures()
 			(*texindex)++;
 		}
 		else
 		{
-			I_Error("Error parsing TEXTURES lump: Expected \"WALLTEXTURE\", got \"%s\"",texturesToken);
+			I_Error("Error parsing TEXTURES lump: Expected \"WALLTEXTURE\" or \"TEXTURE\", got \"%s\"",texturesToken);
 		}
 		texturesToken = M_GetToken(NULL);
 	}
@@ -1267,6 +1336,41 @@ lumpnum_t R_GetFlatNumForName(const char *name)
 		lump = LUMPERROR;
 	}
 
+	// Detect textures
+	if (lump == LUMPERROR)
+	{
+		// Scan wad files backwards so patched textures take preference.
+		for (i = numwadfiles - 1; i >= 0; i--)
+		{
+			switch (wadfiles[i]->type)
+			{
+			case RET_WAD:
+				if ((start = W_CheckNumForNamePwad("TX_START", (UINT16)i, 0)) == INT16_MAX)
+					continue;
+				if ((end = W_CheckNumForNamePwad("TX_END", (UINT16)i, start)) == INT16_MAX)
+					continue;
+				break;
+			case RET_PK3:
+				if ((start = W_CheckNumForFolderStartPK3("Textures/", i, 0)) == INT16_MAX)
+					continue;
+				if ((end = W_CheckNumForFolderEndPK3("Textures/", i, start)) == INT16_MAX)
+					continue;
+				break;
+			default:
+				continue;
+			}
+
+			// Now find lump with specified name in that range.
+			lump = W_CheckNumForNamePwad(name, (UINT16)i, start);
+			if (lump < end)
+			{
+				lump += (i<<16); // found it, in our constraints
+				break;
+			}
+			lump = LUMPERROR;
+		}
+	}
+
 	if (lump == LUMPERROR)
 	{
 		if (strcmp(name, SKYFLATNAME))
@@ -1615,7 +1719,6 @@ extracolormap_t *R_ColormapForName(char *name)
 //
 static double deltas[256][3], map[256][3];
 
-static UINT8 NearestColor(UINT8 r, UINT8 g, UINT8 b);
 static int RoundUp(double number);
 
 lighttable_t *R_CreateLightTable(extracolormap_t *extra_colormap)
@@ -2027,7 +2130,7 @@ extracolormap_t *R_AddColormaps(extracolormap_t *exc_augend, extracolormap_t *ex
 
 // Thanks to quake2 source!
 // utils3/qdata/images.c
-static UINT8 NearestColor(UINT8 r, UINT8 g, UINT8 b)
+UINT8 NearestColor(UINT8 r, UINT8 g, UINT8 b)
 {
 	int dr, dg, db;
 	int distortion, bestdistortion = 256 * 256 * 4, bestcolor = 0, i;
@@ -2306,3 +2409,479 @@ void R_PrecacheLevel(void)
 			"texturememory: %s k\n"
 			"spritememory:  %s k\n", sizeu1(flatmemory>>10), sizeu2(texturememory>>10), sizeu3(spritememory>>10));
 }
+
+// https://github.com/coelckers/prboom-plus/blob/master/prboom2/src/r_patch.c#L350
+boolean R_CheckIfPatch(lumpnum_t lump)
+{
+	size_t size;
+	INT16 width, height;
+	patch_t *patch;
+	boolean result;
+
+	size = W_LumpLength(lump);
+
+	// minimum length of a valid Doom patch
+	if (size < 13)
+		return false;
+
+	patch = (patch_t *)W_CacheLumpNum(lump, PU_STATIC);
+
+	width = SHORT(patch->width);
+	height = SHORT(patch->height);
+
+	result = (height > 0 && height <= 16384 && width > 0 && width <= 16384 && width < (INT16)(size / 4));
+
+	if (result)
+	{
+		// The dimensions seem like they might be valid for a patch, so
+		// check the column directory for extra security. All columns
+		// must begin after the column directory, and none of them must
+		// point past the end of the patch.
+		INT16 x;
+
+		for (x = 0; x < width; x++)
+		{
+			UINT32 ofs = LONG(patch->columnofs[x]);
+
+			// Need one byte for an empty column (but there's patches that don't know that!)
+			if (ofs < (UINT32)width * 4 + 8 || ofs >= (UINT32)size)
+			{
+				result = false;
+				break;
+			}
+		}
+	}
+
+	return result;
+}
+
+void R_PatchToFlat(patch_t *patch, UINT8 *flat)
+{
+	fixed_t col, ofs;
+	column_t *column;
+	UINT8 *desttop, *dest, *deststop;
+	UINT8 *source;
+
+	desttop = flat;
+	deststop = desttop + (SHORT(patch->width) * SHORT(patch->height));
+
+	for (col = 0; col < SHORT(patch->width); col++, desttop++)
+	{
+		INT32 topdelta, prevdelta = -1;
+		column = (column_t *)((UINT8 *)patch + LONG(patch->columnofs[col]));
+
+		while (column->topdelta != 0xff)
+		{
+			topdelta = column->topdelta;
+			if (topdelta <= prevdelta)
+				topdelta += prevdelta;
+			prevdelta = topdelta;
+
+			dest = desttop + (topdelta * SHORT(patch->width));
+			source = (UINT8 *)(column) + 3;
+			for (ofs = 0; dest < deststop && ofs < column->length; ofs++)
+			{
+				*dest = source[ofs];
+				dest += SHORT(patch->width);
+			}
+			column = (column_t *)((UINT8 *)column + column->length + 4);
+		}
+	}
+}
+
+#ifndef NO_PNG_LUMPS
+boolean R_IsLumpPNG(UINT8 *d, size_t s)
+{
+	if (s < 67) // http://garethrees.org/2007/11/14/pngcrush/
+		return false;
+	// Check for PNG file signature using memcmp
+	// As it may be faster on CPUs with slow unaligned memory access
+	// Ref: http://www.libpng.org/pub/png/spec/1.2/PNG-Rationale.html#R.PNG-file-signature
+	return (memcmp(&d[0], "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", 8) == 0);
+}
+
+#ifdef HAVE_PNG
+typedef struct {
+	png_bytep buffer;
+	png_uint_32 bufsize;
+	png_uint_32 current_pos;
+} png_ioread;
+
+static void PNG_IOReader(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+	png_ioread *f = png_get_io_ptr(png_ptr);
+	if (length > (f->bufsize - f->current_pos))
+		png_error(png_ptr, "PNG_IOReader: buffer overrun");
+	memcpy(data, f->buffer + f->current_pos, length);
+	f->current_pos += length;
+}
+
+static void PNG_error(png_structp PNG, png_const_charp pngtext)
+{
+	CONS_Debug(DBG_RENDER, "libpng error at %p: %s", PNG, pngtext);
+	//I_Error("libpng error at %p: %s", PNG, pngtext);
+}
+
+static void PNG_warn(png_structp PNG, png_const_charp pngtext)
+{
+	CONS_Debug(DBG_RENDER, "libpng warning at %p: %s", PNG, pngtext);
+}
+
+static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
+{
+	png_structp png_ptr;
+	png_infop png_info_ptr;
+	png_uint_32 width, height;
+	int bit_depth, color_type;
+	png_uint_32 y;
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+	jmp_buf jmpbuf;
+#endif
+#endif
+
+	png_ioread png_io;
+	png_bytep *row_pointers;
+
+	png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL,
+		PNG_error, PNG_warn);
+	if (!png_ptr)
+	{
+		CONS_Debug(DBG_RENDER, "PNG_Load: Error on initialize libpng\n");
+		return NULL;
+	}
+
+	png_info_ptr = png_create_info_struct(png_ptr);
+	if (!png_info_ptr)
+	{
+		CONS_Debug(DBG_RENDER, "PNG_Load: Error on allocate for libpng\n");
+		png_destroy_read_struct(&png_ptr, NULL, NULL);
+		return NULL;
+	}
+
+#ifdef USE_FAR_KEYWORD
+	if (setjmp(jmpbuf))
+#else
+	if (setjmp(png_jmpbuf(png_ptr)))
+#endif
+	{
+		//CONS_Debug(DBG_RENDER, "libpng load error on %s\n", filename);
+		png_destroy_read_struct(&png_ptr, &png_info_ptr, NULL);
+		return NULL;
+	}
+#ifdef USE_FAR_KEYWORD
+	png_memcpy(png_jmpbuf(png_ptr), jmpbuf, sizeof jmp_buf);
+#endif
+
+	// set our own read_function
+	png_io.buffer = (png_bytep)png;
+	png_io.bufsize = size;
+	png_io.current_pos = 0;
+	png_set_read_fn(png_ptr, &png_io, PNG_IOReader);
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+	png_set_user_limits(png_ptr, 2048, 2048);
+#endif
+
+	png_read_info(png_ptr, png_info_ptr);
+
+	png_get_IHDR(png_ptr, png_info_ptr, &width, &height, &bit_depth, &color_type,
+	 NULL, NULL, NULL);
+
+	if (bit_depth == 16)
+		png_set_strip_16(png_ptr);
+
+	if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+		png_set_gray_to_rgb(png_ptr);
+	else if (color_type == PNG_COLOR_TYPE_PALETTE)
+		png_set_palette_to_rgb(png_ptr);
+
+	if (png_get_valid(png_ptr, png_info_ptr, PNG_INFO_tRNS))
+		png_set_tRNS_to_alpha(png_ptr);
+	else if (color_type != PNG_COLOR_TYPE_RGB_ALPHA && color_type != PNG_COLOR_TYPE_GRAY_ALPHA)
+	{
+#if PNG_LIBPNG_VER < 10207
+		png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER);
+#else
+		png_set_add_alpha(png_ptr, 0xFF, PNG_FILLER_AFTER);
+#endif
+	}
+
+	png_read_update_info(png_ptr, png_info_ptr);
+
+	// Read the image
+	row_pointers = (png_bytep*)malloc(sizeof(png_bytep) * height);
+	for (y = 0; y < height; y++)
+		row_pointers[y] = (png_byte*)malloc(png_get_rowbytes(png_ptr, png_info_ptr));
+	png_read_image(png_ptr, row_pointers);
+	png_destroy_read_struct(&png_ptr, &png_info_ptr, NULL);
+
+	*w = (INT32)width;
+	*h = (INT32)height;
+	return row_pointers;
+}
+
+// Convert a PNG to a raw image.
+static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
+{
+	UINT8 *flat;
+	png_uint_32 x, y;
+	png_bytep *row_pointers = PNG_Read(png, w, h, size);
+	png_uint_32 width = *w, height = *h;
+
+	if (!row_pointers)
+		I_Error("PNG_RawConvert: conversion failed");
+
+	// Convert the image to 8bpp
+	flat = Z_Malloc(width * height, PU_LEVEL, NULL);
+	memset(flat, TRANSPARENTPIXEL, width * height);
+	for (y = 0; y < height; y++)
+	{
+		png_bytep row = row_pointers[y];
+		for (x = 0; x < width; x++)
+		{
+			png_bytep px = &(row[x * 4]);
+			if ((UINT8)px[3])
+				flat[((y * width) + x)] = NearestColor((UINT8)px[0], (UINT8)px[1], (UINT8)px[2]);
+		}
+	}
+	free(row_pointers);
+
+	return flat;
+}
+
+// Convert a PNG to a flat.
+UINT8 *R_PNGToFlat(levelflat_t *levelflat, UINT8 *png, size_t size)
+{
+	return PNG_RawConvert(png, &levelflat->width, &levelflat->height, size);
+}
+
+// Convert a PNG to a patch.
+static unsigned char imgbuf[1<<26];
+patch_t *R_PNGToPatch(UINT8 *png, size_t size)
+{
+	UINT16 width, height;
+	UINT8 *raw = PNG_RawConvert(png, &width, &height, size);
+
+	UINT32 x, y;
+	UINT8 *img;
+	UINT8 *imgptr = imgbuf;
+	UINT8 *colpointers, *startofspan;
+
+	#define WRITE8(buf, a) ({*buf = (a); buf++;})
+	#define WRITE16(buf, a) ({*buf = (a)&255; buf++; *buf = (a)>>8; buf++;})
+	#define WRITE32(buf, a) ({WRITE16(buf, (a)&65535); WRITE16(buf, (a)>>16);})
+
+	if (!raw)
+		I_Error("R_PNGToPatch: conversion failed");
+
+	// Write image size and offset
+	WRITE16(imgptr, width);
+	WRITE16(imgptr, height);
+	// no offsets
+	WRITE16(imgptr, 0);
+	WRITE16(imgptr, 0);
+
+	// Leave placeholder to column pointers
+	colpointers = imgptr;
+	imgptr += width*4;
+
+	// Write columns
+	for (x = 0; x < width; x++)
+	{
+		int lastStartY = 0;
+		int spanSize = 0;
+		startofspan = NULL;
+
+		//printf("%d ", x);
+		// Write column pointer (@TODO may be wrong)
+		WRITE32(colpointers, imgptr - imgbuf);
+
+		// Write pixels
+		for (y = 0; y < height; y++)
+		{
+			UINT8 paletteIndex = raw[((y * width) + x)];
+
+			// Start new column if we need to
+			if (!startofspan || spanSize == 255)
+			{
+				int writeY = y;
+
+				// If we reached the span size limit, finish the previous span
+				if (startofspan)
+					WRITE8(imgptr, 0);
+
+				if (y > 254)
+				{
+					// Make sure we're aligned to 254
+					if (lastStartY < 254)
+					{
+						WRITE8(imgptr, 254);
+						WRITE8(imgptr, 0);
+						imgptr += 2;
+						lastStartY = 254;
+					}
+
+					// Write stopgap empty spans if needed
+					writeY = y - lastStartY;
+
+					while (writeY > 254)
+					{
+						WRITE8(imgptr, 254);
+						WRITE8(imgptr, 0);
+						imgptr += 2;
+						writeY -= 254;
+					}
+				}
+
+				startofspan = imgptr;
+				WRITE8(imgptr, writeY);///@TODO calculate starting y pos
+				imgptr += 2;
+				spanSize = 0;
+
+				lastStartY = y;
+			}
+
+			// Write the pixel
+			WRITE8(imgptr, paletteIndex);
+			spanSize++;
+			startofspan[1] = spanSize;
+		}
+
+		if (startofspan)
+			WRITE8(imgptr, 0);
+
+		WRITE8(imgptr, 0xFF);
+	}
+
+	#undef WRITE8
+	#undef WRITE16
+	#undef WRITE32
+
+	size = imgptr-imgbuf;
+	img = malloc(size);
+	memcpy(img, imgbuf, size);
+
+	Z_Free(raw);
+
+	return (patch_t *)img;
+}
+
+boolean R_PNGDimensions(UINT8 *png, INT16 *width, INT16 *height, size_t size)
+{
+	png_structp png_ptr;
+	png_infop png_info_ptr;
+	png_uint_32 w, h;
+	int bit_depth, color_type;
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+	jmp_buf jmpbuf;
+#endif
+#endif
+
+	png_ioread png_io;
+
+	png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL,
+		PNG_error, PNG_warn);
+	if (!png_ptr)
+	{
+		CONS_Debug(DBG_RENDER, "PNG_Load: Error on initialize libpng\n");
+		return false;
+	}
+
+	png_info_ptr = png_create_info_struct(png_ptr);
+	if (!png_info_ptr)
+	{
+		CONS_Debug(DBG_RENDER, "PNG_Load: Error on allocate for libpng\n");
+		png_destroy_read_struct(&png_ptr, NULL, NULL);
+		return false;
+	}
+
+#ifdef USE_FAR_KEYWORD
+	if (setjmp(jmpbuf))
+#else
+	if (setjmp(png_jmpbuf(png_ptr)))
+#endif
+	{
+		//CONS_Debug(DBG_RENDER, "libpng load error on %s\n", filename);
+		png_destroy_read_struct(&png_ptr, &png_info_ptr, NULL);
+		return false;
+	}
+#ifdef USE_FAR_KEYWORD
+	png_memcpy(png_jmpbuf(png_ptr), jmpbuf, sizeof jmp_buf);
+#endif
+
+	// set our own read_function
+	png_io.buffer = (png_bytep)png;
+	png_io.bufsize = size;
+	png_io.current_pos = 0;
+	png_set_read_fn(png_ptr, &png_io, PNG_IOReader);
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+	png_set_user_limits(png_ptr, 2048, 2048);
+#endif
+
+	png_read_info(png_ptr, png_info_ptr);
+
+	png_get_IHDR(png_ptr, png_info_ptr, &w, &h, &bit_depth, &color_type,
+	 NULL, NULL, NULL);
+
+	// okay done. stop.
+	png_destroy_read_struct(&png_ptr, &png_info_ptr, NULL);
+
+	*width = (INT32)w;
+	*height = (INT32)h;
+	return true;
+}
+#endif
+#endif
+
+void R_TextureToFlat(size_t tex, UINT8 *flat)
+{
+	texture_t *texture = textures[tex];
+
+	fixed_t col, ofs;
+	column_t *column;
+	UINT8 *desttop, *dest, *deststop;
+	UINT8 *source;
+
+	desttop = flat;
+	deststop = desttop + (texture->width * texture->height);
+
+	for (col = 0; col < texture->width; col++, desttop++)
+	{
+		column = (column_t *)R_GetColumn(tex, col);
+		if (!texture->holes)
+		{
+			dest = desttop;
+			source = (UINT8 *)(column);
+			for (ofs = 0; dest < deststop && ofs < texture->height; ofs++)
+			{
+				if (source[ofs] != TRANSPARENTPIXEL)
+					*dest = source[ofs];
+				dest += texture->width;
+			}
+		}
+		else
+		{
+			INT32 topdelta, prevdelta = -1;
+			while (column->topdelta != 0xff)
+			{
+				topdelta = column->topdelta;
+				if (topdelta <= prevdelta)
+					topdelta += prevdelta;
+				prevdelta = topdelta;
+
+				dest = desttop + (topdelta * texture->width);
+				source = (UINT8 *)(column) + 3;
+				for (ofs = 0; dest < deststop && ofs < column->length; ofs++)
+				{
+					if (source[ofs] != TRANSPARENTPIXEL)
+						*dest = source[ofs];
+					dest += texture->width;
+				}
+				column = (column_t *)((UINT8 *)column + column->length + 4);
+			}
+		}
+	}
+}
diff --git a/src/r_data.h b/src/r_data.h
index b6b0a16a15fad21fb90aa0325721d177a1c6eccc..b29bf4557aba12628e53d49336ead3115a7b1c16 100644
--- a/src/r_data.h
+++ b/src/r_data.h
@@ -16,6 +16,7 @@
 
 #include "r_defs.h"
 #include "r_state.h"
+#include "p_setup.h" // levelflats
 
 #ifdef __GNUG__
 #pragma interface
@@ -55,12 +56,17 @@ typedef struct
 	texpatch_t patches[0];
 } texture_t;
 
+typedef struct
+{
+	UINT8 *flat;
+	INT16 width, height;
+} textureflat_t;
+
 // all loaded and prepared textures from the start of the game
 extern texture_t **textures;
+extern textureflat_t *texflats;
 
-// texture width is a power of 2, so it can easily repeat along sidedefs using a simple mask
-extern INT32 *texturewidthmask;
-
+extern INT32 *texturewidth;
 extern fixed_t *textureheight; // needed for texture pegging
 
 extern INT16 color8to16[256]; // remap color index to highcolor
@@ -88,7 +94,6 @@ void R_PrecacheLevel(void);
 // Floor/ceiling opaque texture tiles,
 // lookup by name. For animation?
 lumpnum_t R_GetFlatNumForName(const char *name);
-#define R_FlatNumForName(x) R_GetFlatNumForName(x)
 
 // Called by P_Ticker for switches and animations,
 // returns the texture number for the texture name.
@@ -148,6 +153,20 @@ const char *R_NameForColormap(extracolormap_t *extra_colormap);
 #define R_PutRgbaRGB(r, g, b) (R_PutRgbaR(r) + R_PutRgbaG(g) + R_PutRgbaB(b))
 #define R_PutRgbaRGBA(r, g, b, a) (R_PutRgbaRGB(r, g, b) + R_PutRgbaA(a))
 
+boolean R_CheckIfPatch(lumpnum_t lump);
+UINT8 NearestColor(UINT8 r, UINT8 g, UINT8 b);
+
+void R_PatchToFlat(patch_t *patch, UINT8 *flat);
+void R_TextureToFlat(size_t tex, UINT8 *flat);
+
+#ifndef NO_PNG_LUMPS
+boolean R_IsLumpPNG(UINT8 *d, size_t s);
+
+UINT8 *R_PNGToFlat(levelflat_t *levelflat, UINT8 *png, size_t size);
+patch_t *R_PNGToPatch(UINT8 *png, size_t size);
+boolean R_PNGDimensions(UINT8 *png, INT16 *width, INT16 *height, size_t size);
+#endif
+
 extern INT32 numtextures;
 
 #endif
diff --git a/src/r_draw.c b/src/r_draw.c
index 396ed034457122d9f6f1e917a30b1485d6457491..1754403c44141a2ca599bddcc28d0ca6ed90ca80 100644
--- a/src/r_draw.c
+++ b/src/r_draw.c
@@ -99,6 +99,8 @@ INT32 dc_numlights = 0, dc_maxlights, dc_texheight;
 INT32 ds_y, ds_x1, ds_x2;
 lighttable_t *ds_colormap;
 fixed_t ds_xfrac, ds_yfrac, ds_xstep, ds_ystep;
+UINT16 ds_flatwidth, ds_flatheight;
+boolean ds_powersoftwo;
 
 UINT8 *ds_source; // start of a 64*64 tile image
 UINT8 *ds_transmap; // one of the translucency tables
diff --git a/src/r_draw.h b/src/r_draw.h
index 82498eb11e22d30569e69b2baca059d1acbeb40e..3c142972257c446a29e32fd1c36a810e47c63bc0 100644
--- a/src/r_draw.h
+++ b/src/r_draw.h
@@ -57,7 +57,9 @@ extern INT32 dc_texheight;
 extern INT32 ds_y, ds_x1, ds_x2;
 extern lighttable_t *ds_colormap;
 extern fixed_t ds_xfrac, ds_yfrac, ds_xstep, ds_ystep;
-extern UINT8 *ds_source; // start of a 64*64 tile image
+extern UINT16 ds_flatwidth, ds_flatheight;
+extern boolean ds_powersoftwo;
+extern UINT8 *ds_source;
 extern UINT8 *ds_transmap;
 
 #ifdef ESLOPE
@@ -128,6 +130,8 @@ void R_FillBackScreen(void);
 void R_DrawViewBorder(void);
 #endif
 
+#define TRANSPARENTPIXEL 255
+
 // -----------------
 // 8bpp DRAWING CODE
 // -----------------
@@ -169,6 +173,13 @@ void R_DrawFogSpan_8(void);
 void R_DrawFogColumn_8(void);
 void R_DrawColumnShadowed_8(void);
 
+#ifndef NOWATER
+void R_DrawTranslucentWaterSpan_8(void);
+
+extern INT32 ds_bgofs;
+extern INT32 ds_waterofs;
+#endif
+
 // ------------------
 // 16bpp DRAWING CODE
 // ------------------
diff --git a/src/r_draw8.c b/src/r_draw8.c
index 8a2d37fb3d796d81b7bf1a470d8bfa93971f3d0e..77406f83c6d6420835a6cf11de32e7a7edb27441 100644
--- a/src/r_draw8.c
+++ b/src/r_draw8.c
@@ -105,8 +105,6 @@ void R_DrawColumn_8(void)
 	}
 }
 
-#define TRANSPARENTPIXEL 255
-
 void R_Draw2sMultiPatchColumn_8(void)
 {
 	INT32 count;
@@ -543,16 +541,19 @@ void R_DrawTranslatedColumn_8(void)
 */
 void R_DrawSpan_8 (void)
 {
-	UINT32 xposition;
-	UINT32 yposition;
-	UINT32 xstep, ystep;
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
 
 	UINT8 *source;
 	UINT8 *colormap;
 	UINT8 *dest;
 	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
 
-	size_t count;
+	size_t count = (ds_x2 - ds_x1 + 1);
+
+	xposition = ds_xfrac; yposition = ds_yfrac;
+	xstep = ds_xstep; ystep = ds_ystep;
 
 	// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
 	// can be used for the fraction part. This allows calculation of the memory address in the
@@ -561,62 +562,88 @@ void R_DrawSpan_8 (void)
 	// bit per power of two (obviously)
 	// Ok, because I was able to eliminate the variable spot below, this function is now FASTER
 	// than the original span renderer. Whodathunkit?
-	xposition = ds_xfrac << nflatshiftup; yposition = ds_yfrac << nflatshiftup;
-	xstep = ds_xstep << nflatshiftup; ystep = ds_ystep << nflatshiftup;
+	if (ds_powersoftwo)
+	{
+		xposition <<= nflatshiftup; yposition <<= nflatshiftup;
+		xstep <<= nflatshiftup; ystep <<= nflatshiftup;
+	}
 
 	source = ds_source;
 	colormap = ds_colormap;
 	dest = ylookup[ds_y] + columnofs[ds_x1];
-	count = ds_x2 - ds_x1 + 1;
 
 	if (dest+8 > deststop)
 		return;
 
-	while (count >= 8)
+	if (!ds_powersoftwo)
 	{
-		// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
-		// have the uber complicated math to calculate it now, so that was a memory write we didn't
-		// need!
-		dest[0] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[1] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[2] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[3] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[4] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[5] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[6] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[7] = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest += 8;
-		count -= 8;
+		while (count-- && dest <= deststop)
+		{
+			fixed_t x = (xposition >> FRACBITS);
+			fixed_t y = (yposition >> FRACBITS);
+
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
+
+			*dest++ = colormap[source[((y * ds_flatwidth) + x)]];
+			xposition += xstep;
+			yposition += ystep;
+		}
 	}
-	while (count-- && dest <= deststop)
+	else
 	{
-		*dest++ = colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]];
-		xposition += xstep;
-		yposition += ystep;
+		while (count >= 8)
+		{
+			// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
+			// have the uber complicated math to calculate it now, so that was a memory write we didn't
+			// need!
+			dest[0] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[1] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[2] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[3] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[4] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[5] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[6] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[7] = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest += 8;
+			count -= 8;
+		}
+		while (count-- && dest <= deststop)
+		{
+			*dest++ = colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]];
+			xposition += xstep;
+			yposition += ystep;
+		}
 	}
 }
 
@@ -697,7 +724,24 @@ void R_DrawTiltedSpan_8(void)
 
 		colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
 
-		*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
+		if (!ds_powersoftwo)
+		{
+			fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+			fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
+
+			*dest = colormap[source[((y * ds_flatwidth) + x)]];
+		}
+		else
+			*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
 		dest++;
 		iz += ds_sz.x;
 		uz += ds_su.x;
@@ -734,7 +778,24 @@ void R_DrawTiltedSpan_8(void)
 		for (i = SPANSIZE-1; i >= 0; i--)
 		{
 			colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-			*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
+			if (!ds_powersoftwo)
+			{
+				fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+				fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+				// Carefully align all of my Friends.
+				if (x < 0)
+					x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+				if (y < 0)
+					y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+				x %= ds_flatwidth;
+				y %= ds_flatheight;
+
+				*dest = colormap[source[((y * ds_flatwidth) + x)]];
+			}
+			else
+				*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
 			dest++;
 			u += stepu;
 			v += stepv;
@@ -750,7 +811,24 @@ void R_DrawTiltedSpan_8(void)
 			u = (INT64)(startu);
 			v = (INT64)(startv);
 			colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-			*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
+			if (!ds_powersoftwo)
+			{
+				fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+				fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+				// Carefully align all of my Friends.
+				if (x < 0)
+					x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+				if (y < 0)
+					y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+				x %= ds_flatwidth;
+				y %= ds_flatheight;
+
+				*dest = colormap[source[((y * ds_flatwidth) + x)]];
+			}
+			else
+				*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
 		}
 		else
 		{
@@ -771,7 +849,24 @@ void R_DrawTiltedSpan_8(void)
 			for (; width != 0; width--)
 			{
 				colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-				*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
+				if (!ds_powersoftwo)
+				{
+					fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+					fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+					// Carefully align all of my Friends.
+					if (x < 0)
+						x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+					if (y < 0)
+						y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+					x %= ds_flatwidth;
+					y %= ds_flatheight;
+
+					*dest = colormap[source[((y * ds_flatwidth) + x)]];
+				}
+				else
+					*dest = colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]];
 				dest++;
 				u += stepu;
 				v += stepv;
@@ -832,7 +927,24 @@ void R_DrawTiltedTranslucentSpan_8(void)
 		v = (INT64)(vz*z) + viewy;
 
 		colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-		*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
+		if (!ds_powersoftwo)
+		{
+			fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+			fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
+
+			*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
+		}
+		else
+			*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
 		dest++;
 		iz += ds_sz.x;
 		uz += ds_su.x;
@@ -869,7 +981,24 @@ void R_DrawTiltedTranslucentSpan_8(void)
 		for (i = SPANSIZE-1; i >= 0; i--)
 		{
 			colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-			*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
+			if (!ds_powersoftwo)
+			{
+				fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+				fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+				// Carefully align all of my Friends.
+				if (x < 0)
+					x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+				if (y < 0)
+					y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+				x %= ds_flatwidth;
+				y %= ds_flatheight;
+
+				*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
+			}
+			else
+				*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
 			dest++;
 			u += stepu;
 			v += stepv;
@@ -885,7 +1014,24 @@ void R_DrawTiltedTranslucentSpan_8(void)
 			u = (INT64)(startu);
 			v = (INT64)(startv);
 			colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-			*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
+			if (!ds_powersoftwo)
+			{
+				fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+				fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+				// Carefully align all of my Friends.
+				if (x < 0)
+					x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+				if (y < 0)
+					y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+				x %= ds_flatwidth;
+				y %= ds_flatheight;
+
+				*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
+			}
+			else
+				*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
 		}
 		else
 		{
@@ -906,7 +1052,24 @@ void R_DrawTiltedTranslucentSpan_8(void)
 			for (; width != 0; width--)
 			{
 				colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-				*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
+				if (!ds_powersoftwo)
+				{
+					fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+					fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+					// Carefully align all of my Friends.
+					if (x < 0)
+						x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+					if (y < 0)
+						y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+					x %= ds_flatwidth;
+					y %= ds_flatheight;
+
+					*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
+				}
+				else
+					*dest = *(ds_transmap + (colormap[source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)]] << 8) + *dest);
 				dest++;
 				u += stepu;
 				v += stepv;
@@ -967,9 +1130,28 @@ void R_DrawTiltedSplat_8(void)
 
 		colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
 
-		val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
+		if (!ds_powersoftwo)
+		{
+			fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+			fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
+
+			val = source[((y * ds_flatwidth) + x)];
+		}
+		else
+			val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
+
 		if (val != TRANSPARENTPIXEL)
 			*dest = colormap[val];
+
 		dest++;
 		iz += ds_sz.x;
 		uz += ds_su.x;
@@ -1006,7 +1188,24 @@ void R_DrawTiltedSplat_8(void)
 		for (i = SPANSIZE-1; i >= 0; i--)
 		{
 			colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-			val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
+			if (!ds_powersoftwo)
+			{
+				fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+				fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+				// Carefully align all of my Friends.
+				if (x < 0)
+					x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+				if (y < 0)
+					y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+				x %= ds_flatwidth;
+				y %= ds_flatheight;
+
+				val = source[((y * ds_flatwidth) + x)];
+			}
+			else
+				val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
 			if (val != TRANSPARENTPIXEL)
 				*dest = colormap[val];
 			dest++;
@@ -1024,7 +1223,24 @@ void R_DrawTiltedSplat_8(void)
 			u = (INT64)(startu);
 			v = (INT64)(startv);
 			colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
-			val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
+			if (!ds_powersoftwo)
+			{
+				fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+				fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+				// Carefully align all of my Friends.
+				if (x < 0)
+					x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+				if (y < 0)
+					y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+				x %= ds_flatwidth;
+				y %= ds_flatheight;
+
+				val = source[((y * ds_flatwidth) + x)];
+			}
+			else
+				val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
 			if (val != TRANSPARENTPIXEL)
 				*dest = colormap[val];
 		}
@@ -1048,6 +1264,24 @@ void R_DrawTiltedSplat_8(void)
 			{
 				colormap = planezlight[tiltlighting[ds_x1++]] + (ds_colormap - colormaps);
 				val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
+				if (!ds_powersoftwo)
+				{
+					fixed_t x = (((fixed_t)u-viewx) >> FRACBITS);
+					fixed_t y = (((fixed_t)v-viewy) >> FRACBITS);
+
+					// Carefully align all of my Friends.
+					if (x < 0)
+						x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+					if (y < 0)
+						y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+					x %= ds_flatwidth;
+					y %= ds_flatheight;
+
+					val = source[((y * ds_flatwidth) + x)];
+				}
+				else
+					val = source[((v >> nflatyshift) & nflatmask) | (u >> nflatxshift)];
 				if (val != TRANSPARENTPIXEL)
 					*dest = colormap[val];
 				dest++;
@@ -1065,17 +1299,21 @@ void R_DrawTiltedSplat_8(void)
 */
 void R_DrawSplat_8 (void)
 {
-	UINT32 xposition;
-	UINT32 yposition;
-	UINT32 xstep, ystep;
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
 
 	UINT8 *source;
 	UINT8 *colormap;
 	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
 
-	size_t count;
+	size_t count = (ds_x2 - ds_x1 + 1);
 	UINT32 val;
 
+	xposition = ds_xfrac; yposition = ds_yfrac;
+	xstep = ds_xstep; ystep = ds_ystep;
+
 	// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
 	// can be used for the fraction part. This allows calculation of the memory address in the
 	// texture with two shifts, an OR and one AND. (see below)
@@ -1083,99 +1321,125 @@ void R_DrawSplat_8 (void)
 	// bit per power of two (obviously)
 	// Ok, because I was able to eliminate the variable spot below, this function is now FASTER
 	// than the original span renderer. Whodathunkit?
-	xposition = ds_xfrac << nflatshiftup; yposition = ds_yfrac << nflatshiftup;
-	xstep = ds_xstep << nflatshiftup; ystep = ds_ystep << nflatshiftup;
+	if (ds_powersoftwo)
+	{
+		xposition <<= nflatshiftup; yposition <<= nflatshiftup;
+		xstep <<= nflatshiftup; ystep <<= nflatshiftup;
+	}
 
 	source = ds_source;
 	colormap = ds_colormap;
 	dest = ylookup[ds_y] + columnofs[ds_x1];
-	count = ds_x2 - ds_x1 + 1;
 
-	while (count >= 8)
+	if (!ds_powersoftwo)
 	{
-		// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
-		// have the uber complicated math to calculate it now, so that was a memory write we didn't
-		// need!
-		//
-		// <Callum> 4194303 = (2048x2048)-1 (2048x2048 is maximum flat size)
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[0] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+		while (count-- && dest <= deststop)
+		{
+			fixed_t x = (xposition >> FRACBITS);
+			fixed_t y = (yposition >> FRACBITS);
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[1] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[2] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[3] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			val = source[((y * ds_flatwidth) + x)];
+			if (val != TRANSPARENTPIXEL)
+				*dest = colormap[val];
+			dest++;
+			xposition += xstep;
+			yposition += ystep;
+		}
+	}
+	else
+	{
+		while (count >= 8)
+		{
+			// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
+			// have the uber complicated math to calculate it now, so that was a memory write we didn't
+			// need!
+			//
+			// <Callum> 4194303 = (2048x2048)-1 (2048x2048 is maximum flat size)
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[0] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[4] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[1] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[5] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[2] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[6] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[3] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
 
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			dest[7] = colormap[val];
-		xposition += xstep;
-		yposition += ystep;
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[4] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
 
-		dest += 8;
-		count -= 8;
-	}
-	while (count--)
-	{
-		val = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift);
-		val &= 4194303;
-		val = source[val];
-		if (val != TRANSPARENTPIXEL)
-			*dest = colormap[val];
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[5] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
 
-		dest++;
-		xposition += xstep;
-		yposition += ystep;
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[6] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
+
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			val &= 4194303;
+			val = source[val];
+			if (val != TRANSPARENTPIXEL)
+				dest[7] = colormap[val];
+			xposition += xstep;
+			yposition += ystep;
+
+			dest += 8;
+			count -= 8;
+		}
+		while (count-- && dest <= deststop)
+		{
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				*dest = colormap[val];
+			dest++;
+			xposition += xstep;
+			yposition += ystep;
+		}
 	}
 }
 
@@ -1184,16 +1448,20 @@ void R_DrawSplat_8 (void)
 */
 void R_DrawTranslucentSplat_8 (void)
 {
-	UINT32 xposition;
-	UINT32 yposition;
-	UINT32 xstep, ystep;
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
 
 	UINT8 *source;
 	UINT8 *colormap;
 	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
 
-	size_t count;
-	UINT8 val;
+	size_t count = (ds_x2 - ds_x1 + 1);
+	UINT32 val;
+
+	xposition = ds_xfrac; yposition = ds_yfrac;
+	xstep = ds_xstep; ystep = ds_ystep;
 
 	// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
 	// can be used for the fraction part. This allows calculation of the memory address in the
@@ -1202,79 +1470,107 @@ void R_DrawTranslucentSplat_8 (void)
 	// bit per power of two (obviously)
 	// Ok, because I was able to eliminate the variable spot below, this function is now FASTER
 	// than the original span renderer. Whodathunkit?
-	xposition = ds_xfrac << nflatshiftup; yposition = ds_yfrac << nflatshiftup;
-	xstep = ds_xstep << nflatshiftup; ystep = ds_ystep << nflatshiftup;
+	if (ds_powersoftwo)
+	{
+		xposition <<= nflatshiftup; yposition <<= nflatshiftup;
+		xstep <<= nflatshiftup; ystep <<= nflatshiftup;
+	}
 
 	source = ds_source;
 	colormap = ds_colormap;
 	dest = ylookup[ds_y] + columnofs[ds_x1];
-	count = ds_x2 - ds_x1 + 1;
 
-	while (count >= 8)
+	if (!ds_powersoftwo)
 	{
-		// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
-		// have the uber complicated math to calculate it now, so that was a memory write we didn't
-		// need!
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[0] = *(ds_transmap + (colormap[val] << 8) + dest[0]);
-		xposition += xstep;
-		yposition += ystep;
+		while (count-- && dest <= deststop)
+		{
+			fixed_t x = (xposition >> FRACBITS);
+			fixed_t y = (yposition >> FRACBITS);
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[1] = *(ds_transmap + (colormap[val] << 8) + dest[1]);
-		xposition += xstep;
-		yposition += ystep;
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[2] = *(ds_transmap + (colormap[val] << 8) + dest[2]);
-		xposition += xstep;
-		yposition += ystep;
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[3] = *(ds_transmap + (colormap[val] << 8) + dest[3]);
-		xposition += xstep;
-		yposition += ystep;
+			val = source[((y * ds_flatwidth) + x)];
+			if (val != TRANSPARENTPIXEL)
+				*dest = *(ds_transmap + (colormap[val] << 8) + *dest);
+			dest++;
+			xposition += xstep;
+			yposition += ystep;
+		}
+	}
+	else
+	{
+		while (count >= 8)
+		{
+			// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
+			// have the uber complicated math to calculate it now, so that was a memory write we didn't
+			// need!
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[0] = *(ds_transmap + (colormap[val] << 8) + dest[0]);
+			xposition += xstep;
+			yposition += ystep;
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[4] = *(ds_transmap + (colormap[val] << 8) + dest[4]);
-		xposition += xstep;
-		yposition += ystep;
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[1] = *(ds_transmap + (colormap[val] << 8) + dest[1]);
+			xposition += xstep;
+			yposition += ystep;
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[5] = *(ds_transmap + (colormap[val] << 8) + dest[5]);
-		xposition += xstep;
-		yposition += ystep;
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[2] = *(ds_transmap + (colormap[val] << 8) + dest[2]);
+			xposition += xstep;
+			yposition += ystep;
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[6] = *(ds_transmap + (colormap[val] << 8) + dest[6]);
-		xposition += xstep;
-		yposition += ystep;
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[3] = *(ds_transmap + (colormap[val] << 8) + dest[3]);
+			xposition += xstep;
+			yposition += ystep;
 
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			dest[7] = *(ds_transmap + (colormap[val] << 8) + dest[7]);
-		xposition += xstep;
-		yposition += ystep;
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[4] = *(ds_transmap + (colormap[val] << 8) + dest[4]);
+			xposition += xstep;
+			yposition += ystep;
 
-		dest += 8;
-		count -= 8;
-	}
-	while (count--)
-	{
-		val = source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)];
-		if (val != TRANSPARENTPIXEL)
-			*dest = *(ds_transmap + (colormap[val] << 8) + *dest);
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[5] = *(ds_transmap + (colormap[val] << 8) + dest[5]);
+			xposition += xstep;
+			yposition += ystep;
 
-		dest++;
-		xposition += xstep;
-		yposition += ystep;
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[6] = *(ds_transmap + (colormap[val] << 8) + dest[6]);
+			xposition += xstep;
+			yposition += ystep;
+
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				dest[7] = *(ds_transmap + (colormap[val] << 8) + dest[7]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest += 8;
+			count -= 8;
+		}
+		while (count-- && dest <= deststop)
+		{
+			val = source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)];
+			if (val != TRANSPARENTPIXEL)
+				*dest = *(ds_transmap + (colormap[val] << 8) + *dest);
+			dest++;
+			xposition += xstep;
+			yposition += ystep;
+		}
 	}
 }
 
@@ -1283,15 +1579,20 @@ void R_DrawTranslucentSplat_8 (void)
 */
 void R_DrawTranslucentSpan_8 (void)
 {
-	UINT32 xposition;
-	UINT32 yposition;
-	UINT32 xstep, ystep;
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
 
 	UINT8 *source;
 	UINT8 *colormap;
 	UINT8 *dest;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
 
-	size_t count;
+	size_t count = (ds_x2 - ds_x1 + 1);
+	UINT32 val;
+
+	xposition = ds_xfrac; yposition = ds_yfrac;
+	xstep = ds_xstep; ystep = ds_ystep;
 
 	// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
 	// can be used for the fraction part. This allows calculation of the memory address in the
@@ -1300,62 +1601,160 @@ void R_DrawTranslucentSpan_8 (void)
 	// bit per power of two (obviously)
 	// Ok, because I was able to eliminate the variable spot below, this function is now FASTER
 	// than the original span renderer. Whodathunkit?
-	xposition = ds_xfrac << nflatshiftup; yposition = ds_yfrac << nflatshiftup;
-	xstep = ds_xstep << nflatshiftup; ystep = ds_ystep << nflatshiftup;
+	if (ds_powersoftwo)
+	{
+		xposition <<= nflatshiftup; yposition <<= nflatshiftup;
+		xstep <<= nflatshiftup; ystep <<= nflatshiftup;
+	}
 
 	source = ds_source;
 	colormap = ds_colormap;
 	dest = ylookup[ds_y] + columnofs[ds_x1];
-	count = ds_x2 - ds_x1 + 1;
 
-	while (count >= 8)
+	if (!ds_powersoftwo)
 	{
-		// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
-		// have the uber complicated math to calculate it now, so that was a memory write we didn't
-		// need!
-		dest[0] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[0]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[1] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[1]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[2] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[2]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[3] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[3]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[4] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[4]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[5] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[5]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[6] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[6]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[7] = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + dest[7]);
-		xposition += xstep;
-		yposition += ystep;
-
-		dest += 8;
-		count -= 8;
+		while (count-- && dest <= deststop)
+		{
+			fixed_t x = (xposition >> FRACBITS);
+			fixed_t y = (yposition >> FRACBITS);
+
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
+
+			val = ((y * ds_flatwidth) + x);
+			*dest = *(ds_transmap + (colormap[source[val]] << 8) + *dest);
+			dest++;
+			xposition += xstep;
+			yposition += ystep;
+		}
 	}
-	while (count--)
+	else
 	{
-		*dest = *(ds_transmap + (colormap[source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)]] << 8) + *dest);
-		dest++;
-		xposition += xstep;
-		yposition += ystep;
+		while (count >= 8)
+		{
+			// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
+			// have the uber complicated math to calculate it now, so that was a memory write we didn't
+			// need!
+			dest[0] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[0]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[1] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[1]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[2] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[2]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[3] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[3]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[4] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[4]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[5] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[5]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[6] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[6]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest[7] = *(ds_transmap + (colormap[source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)]] << 8) + dest[7]);
+			xposition += xstep;
+			yposition += ystep;
+
+			dest += 8;
+			count -= 8;
+		}
+		while (count-- && dest <= deststop)
+		{
+			val = (((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift);
+			*dest = *(ds_transmap + (colormap[source[val]] << 8) + *dest);
+			dest++;
+			xposition += xstep;
+			yposition += ystep;
+		}
+	}
+}
+
+#ifndef NOWATER
+void R_DrawTranslucentWaterSpan_8(void)
+{
+	fixed_t xposition;
+	fixed_t yposition;
+	fixed_t xstep, ystep;
+
+	UINT8 *source;
+	UINT8 *colormap;
+	UINT8 *dest;
+	UINT8 *dsrc;
+	const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
+
+	size_t count = (ds_x2 - ds_x1 + 1);
+
+	xposition = ds_xfrac; yposition = (ds_yfrac + ds_waterofs);
+	xstep = ds_xstep; ystep = ds_ystep;
+
+	// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
+	// can be used for the fraction part. This allows calculation of the memory address in the
+	// texture with two shifts, an OR and one AND. (see below)
+	// for texture sizes > 64 the amount of precision we can allow will decrease, but only by one
+	// bit per power of two (obviously)
+	// Ok, because I was able to eliminate the variable spot below, this function is now FASTER
+	// than the original span renderer. Whodathunkit?
+	if (ds_powersoftwo)
+	{
+		xposition <<= nflatshiftup; yposition <<= nflatshiftup;
+		xstep <<= nflatshiftup; ystep <<= nflatshiftup;
+	}
+
+	source = ds_source;
+	colormap = ds_colormap;
+	dest = ylookup[ds_y] + columnofs[ds_x1];
+	dsrc = screens[1] + (ds_y+ds_bgofs)*vid.width + ds_x1;
+
+	if (!ds_powersoftwo)
+	{
+		while (count-- && dest <= deststop)
+		{
+			fixed_t x = (xposition >> FRACBITS);
+			fixed_t y = (yposition >> FRACBITS);
+
+			// Carefully align all of my Friends.
+			if (x < 0)
+				x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
+			if (y < 0)
+				y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
+
+			x %= ds_flatwidth;
+			y %= ds_flatheight;
+
+			*dest++ = colormap[*(ds_transmap + (source[((y * ds_flatwidth) + x)] << 8) + *dsrc++)];
+			xposition += xstep;
+			yposition += ystep;
+		}
+	}
+	else
+	{
+		while (count-- && dest <= deststop)
+		{
+			*dest++ = colormap[*(ds_transmap + (source[(((UINT32)yposition >> nflatyshift) & nflatmask) | ((UINT32)xposition >> nflatxshift)] << 8) + *dsrc++)];
+			xposition += xstep;
+			yposition += ystep;
+		}
 	}
 }
+#endif
 
 /**	\brief The R_DrawFogSpan_8 function
 	Draws the actual span with fogging.
diff --git a/src/r_plane.c b/src/r_plane.c
index 2f6f97240a0418f3608d1901a8406b80716cf10f..de5bf9f003162f0928a044d9b85484fb17ea64ee 100644
--- a/src/r_plane.c
+++ b/src/r_plane.c
@@ -127,91 +127,13 @@ void R_InitPlanes(void)
 //  viewheight
 
 #ifndef NOWATER
-static INT32 bgofs;
+INT32 ds_bgofs;
+INT32 ds_waterofs;
+
 static INT32 wtofs=0;
-static INT32 waterofs;
 static boolean itswater;
 #endif
 
-#ifndef NOWATER
-static void R_DrawTranslucentWaterSpan_8(void)
-{
-	UINT32 xposition;
-	UINT32 yposition;
-	UINT32 xstep, ystep;
-
-	UINT8 *source;
-	UINT8 *colormap;
-	UINT8 *dest;
-	UINT8 *dsrc;
-
-	size_t count;
-
-	// SoM: we only need 6 bits for the integer part (0 thru 63) so the rest
-	// can be used for the fraction part. This allows calculation of the memory address in the
-	// texture with two shifts, an OR and one AND. (see below)
-	// for texture sizes > 64 the amount of precision we can allow will decrease, but only by one
-	// bit per power of two (obviously)
-	// Ok, because I was able to eliminate the variable spot below, this function is now FASTER
-	// than the original span renderer. Whodathunkit?
-	xposition = ds_xfrac << nflatshiftup; yposition = (ds_yfrac + waterofs) << nflatshiftup;
-	xstep = ds_xstep << nflatshiftup; ystep = ds_ystep << nflatshiftup;
-
-	source = ds_source;
-	colormap = ds_colormap;
-	dest = ylookup[ds_y] + columnofs[ds_x1];
-	dsrc = screens[1] + (ds_y+bgofs)*vid.width + ds_x1;
-	count = ds_x2 - ds_x1 + 1;
-
-	while (count >= 8)
-	{
-		// SoM: Why didn't I see this earlier? the spot variable is a waste now because we don't
-		// have the uber complicated math to calculate it now, so that was a memory write we didn't
-		// need!
-		dest[0] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[1] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[2] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[3] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[4] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[5] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[6] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest[7] = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-
-		dest += 8;
-		count -= 8;
-	}
-	while (count--)
-	{
-		*dest++ = colormap[*(ds_transmap + (source[((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)] << 8) + *dsrc++)];
-		xposition += xstep;
-		yposition += ystep;
-	}
-}
-#endif
-
 void R_MapPlane(INT32 y, INT32 x1, INT32 x2)
 {
 	angle_t angle, planecos, planesin;
@@ -258,17 +180,17 @@ void R_MapPlane(INT32 y, INT32 x1, INT32 x2)
 	{
 		const INT32 yay = (wtofs + (distance>>9) ) & 8191;
 		// ripples da water texture
-		bgofs = FixedDiv(FINESINE(yay), (1<<12) + (distance>>11))>>FRACBITS;
+		ds_bgofs = FixedDiv(FINESINE(yay), (1<<12) + (distance>>11))>>FRACBITS;
 		angle = (currentplane->viewangle + currentplane->plangle + xtoviewangle[x1])>>ANGLETOFINESHIFT;
 
 		angle = (angle + 2048) & 8191;  // 90 degrees
-		ds_xfrac += FixedMul(FINECOSINE(angle), (bgofs<<FRACBITS));
-		ds_yfrac += FixedMul(FINESINE(angle), (bgofs<<FRACBITS));
+		ds_xfrac += FixedMul(FINECOSINE(angle), (ds_bgofs<<FRACBITS));
+		ds_yfrac += FixedMul(FINESINE(angle), (ds_bgofs<<FRACBITS));
 
-		if (y+bgofs>=viewheight)
-			bgofs = viewheight-y-1;
-		if (y+bgofs<0)
-			bgofs = -y;
+		if (y+ds_bgofs>=viewheight)
+			ds_bgofs = viewheight-y-1;
+		if (y+ds_bgofs<0)
+			ds_bgofs = -y;
 	}
 #endif
 
@@ -680,7 +602,7 @@ void R_DrawPlanes(void)
 		}
 	}
 #ifndef NOWATER
-	waterofs = (leveltime & 1)*16384;
+	ds_waterofs = (leveltime & 1)*16384;
 	wtofs = leveltime * 140;
 #endif
 }
@@ -728,13 +650,156 @@ static void R_DrawSkyPlane(visplane_t *pl)
 	}
 }
 
+boolean R_CheckPowersOfTwo(void)
+{
+	return (ds_powersoftwo = ((!((ds_flatwidth & (ds_flatwidth - 1)) || (ds_flatheight & (ds_flatheight - 1)))) && (ds_flatwidth == ds_flatheight)));
+}
+
+void R_CheckFlatLength(size_t size)
+{
+	switch (size)
+	{
+		case 4194304: // 2048x2048 lump
+			nflatmask = 0x3FF800;
+			nflatxshift = 21;
+			nflatyshift = 10;
+			nflatshiftup = 5;
+			ds_flatwidth = ds_flatheight = 2048;
+			break;
+		case 1048576: // 1024x1024 lump
+			nflatmask = 0xFFC00;
+			nflatxshift = 22;
+			nflatyshift = 12;
+			nflatshiftup = 6;
+			ds_flatwidth = ds_flatheight = 1024;
+			break;
+		case 262144:// 512x512 lump
+			nflatmask = 0x3FE00;
+			nflatxshift = 23;
+			nflatyshift = 14;
+			nflatshiftup = 7;
+			ds_flatwidth = ds_flatheight = 512;
+			break;
+		case 65536: // 256x256 lump
+			nflatmask = 0xFF00;
+			nflatxshift = 24;
+			nflatyshift = 16;
+			nflatshiftup = 8;
+			ds_flatwidth = ds_flatheight = 256;
+			break;
+		case 16384: // 128x128 lump
+			nflatmask = 0x3F80;
+			nflatxshift = 25;
+			nflatyshift = 18;
+			nflatshiftup = 9;
+			ds_flatwidth = ds_flatheight = 128;
+			break;
+		case 1024: // 32x32 lump
+			nflatmask = 0x3E0;
+			nflatxshift = 27;
+			nflatyshift = 22;
+			nflatshiftup = 11;
+			ds_flatwidth = ds_flatheight = 32;
+			break;
+		default: // 64x64 lump
+			nflatmask = 0xFC0;
+			nflatxshift = 26;
+			nflatyshift = 20;
+			nflatshiftup = 10;
+			ds_flatwidth = ds_flatheight = 64;
+			break;
+	}
+}
+
+static UINT8 *R_GetPatchFlat(levelflat_t *levelflat, boolean leveltexture, boolean ispng)
+{
+	UINT8 *flat;
+	textureflat_t *texflat = &texflats[levelflat->texturenum];
+	patch_t *patch = NULL;
+	boolean texturechanged = (leveltexture ? (levelflat->texturenum != levelflat->lasttexturenum) : false);
+
+	// Check if the texture changed.
+	if (leveltexture && (!texturechanged))
+	{
+		if (texflat != NULL && texflat->flat)
+		{
+			flat = texflat->flat;
+			ds_flatwidth = texflat->width;
+			ds_flatheight = texflat->height;
+			texturechanged = false;
+		}
+		else
+			texturechanged = true;
+	}
+
+	// If the texture changed, or the patch doesn't exist, convert either of them to a flat.
+	if (levelflat->flatpatch == NULL || texturechanged)
+	{
+		if (leveltexture)
+		{
+			texture_t *texture = textures[levelflat->texturenum];
+			texflat->width = ds_flatwidth = texture->width;
+			texflat->height = ds_flatheight = texture->height;
+
+			texflat->flat = Z_Malloc(ds_flatwidth * ds_flatheight, PU_LEVEL, NULL);
+			memset(texflat->flat, TRANSPARENTPIXEL, ds_flatwidth * ds_flatheight);
+			R_TextureToFlat(levelflat->texturenum, texflat->flat);
+			flat = texflat->flat;
+
+			levelflat->flatpatch = flat;
+			levelflat->width = ds_flatwidth;
+			levelflat->height = ds_flatheight;
+		}
+		else
+		{
+			patch = (patch_t *)ds_source;
+#ifndef NO_PNG_LUMPS
+			if (ispng)
+			{
+				levelflat->flatpatch = R_PNGToFlat(levelflat, ds_source, W_LumpLength(levelflat->lumpnum));
+				levelflat->topoffset = levelflat->leftoffset = 0;
+				ds_flatwidth = levelflat->width;
+				ds_flatheight = levelflat->height;
+			}
+			else
+#endif
+			{
+				levelflat->width = ds_flatwidth = SHORT(patch->width);
+				levelflat->height = ds_flatheight = SHORT(patch->height);
+
+				levelflat->topoffset = patch->topoffset * FRACUNIT;
+				levelflat->leftoffset = patch->leftoffset * FRACUNIT;
+
+				levelflat->flatpatch = Z_Malloc(ds_flatwidth * ds_flatheight, PU_LEVEL, NULL);
+				memset(levelflat->flatpatch, TRANSPARENTPIXEL, ds_flatwidth * ds_flatheight);
+				R_PatchToFlat(patch, levelflat->flatpatch);
+			}
+			flat = levelflat->flatpatch;
+		}
+	}
+	else
+	{
+		flat = levelflat->flatpatch;
+		ds_flatwidth = levelflat->width;
+		ds_flatheight = levelflat->height;
+
+		xoffs += levelflat->leftoffset;
+		yoffs += levelflat->topoffset;
+	}
+
+	levelflat->lasttexturenum = levelflat->texturenum;
+	return flat;
+}
+
 void R_DrawSinglePlane(visplane_t *pl)
 {
+	UINT8 *flat;
 	INT32 light = 0;
 	INT32 x;
 	INT32 stop, angle;
 	size_t size;
 	ffloor_t *rover;
+	levelflat_t *levelflat;
 
 	if (!(pl->minx <= pl->maxx))
 		return;
@@ -874,63 +939,42 @@ void R_DrawSinglePlane(visplane_t *pl)
 		viewangle = pl->viewangle+pl->plangle;
 	}
 
-	currentplane = pl;
-
-	ds_source = (UINT8 *)
-		W_CacheLumpNum(levelflats[pl->picnum].lumpnum,
-			PU_STATIC); // Stay here until Z_ChangeTag
-
-	size = W_LumpLength(levelflats[pl->picnum].lumpnum);
+	xoffs = pl->xoffs;
+	yoffs = pl->yoffs;
+	planeheight = abs(pl->height - pl->viewz);
 
-	switch (size)
+	currentplane = pl;
+	levelflat = &levelflats[pl->picnum];
+	size = W_LumpLength(levelflat->lumpnum);
+	ds_source = (UINT8 *)W_CacheLumpNum(levelflat->lumpnum, PU_STATIC); // Stay here until Z_ChangeTag
+
+	// Check if the flat is actually a wall texture.
+	if (levelflat->texturenum != 0 && levelflat->texturenum != -1)
+		flat = R_GetPatchFlat(levelflat, true, false);
+#ifndef NO_PNG_LUMPS
+	// Maybe it's a PNG?!
+	else if (R_IsLumpPNG(ds_source, size))
+		flat = R_GetPatchFlat(levelflat, false, true);
+#endif
+	// Maybe it's just a patch, then?
+	else if (R_CheckIfPatch(levelflat->lumpnum))
+		flat = R_GetPatchFlat(levelflat, false, false);
+	// It's a raw flat.
+	else
 	{
-		case 4194304: // 2048x2048 lump
-			nflatmask = 0x3FF800;
-			nflatxshift = 21;
-			nflatyshift = 10;
-			nflatshiftup = 5;
-			break;
-		case 1048576: // 1024x1024 lump
-			nflatmask = 0xFFC00;
-			nflatxshift = 22;
-			nflatyshift = 12;
-			nflatshiftup = 6;
-			break;
-		case 262144:// 512x512 lump'
-			nflatmask = 0x3FE00;
-			nflatxshift = 23;
-			nflatyshift = 14;
-			nflatshiftup = 7;
-			break;
-		case 65536: // 256x256 lump
-			nflatmask = 0xFF00;
-			nflatxshift = 24;
-			nflatyshift = 16;
-			nflatshiftup = 8;
-			break;
-		case 16384: // 128x128 lump
-			nflatmask = 0x3F80;
-			nflatxshift = 25;
-			nflatyshift = 18;
-			nflatshiftup = 9;
-			break;
-		case 1024: // 32x32 lump
-			nflatmask = 0x3E0;
-			nflatxshift = 27;
-			nflatyshift = 22;
-			nflatshiftup = 11;
-			break;
-		default: // 64x64 lump
-			nflatmask = 0xFC0;
-			nflatxshift = 26;
-			nflatyshift = 20;
-			nflatshiftup = 10;
-			break;
+		R_CheckFlatLength(size);
+		flat = ds_source;
 	}
 
-	xoffs = pl->xoffs;
-	yoffs = pl->yoffs;
-	planeheight = abs(pl->height - pl->viewz);
+	Z_ChangeTag(ds_source, PU_CACHE);
+	ds_source = flat;
+
+	if (ds_source == NULL)
+		return;
+
+	// Check if the flat has dimensions that are powers-of-two numbers.
+	if (R_CheckPowersOfTwo())
+		R_CheckFlatLength(ds_flatwidth * ds_flatheight);
 
 	if (light >= LIGHTLEVELS)
 		light = LIGHTLEVELS-1;
@@ -945,61 +989,64 @@ void R_DrawSinglePlane(visplane_t *pl)
 		floatv3_t p, m, n;
 		float ang;
 		float vx, vy, vz;
+		float fudge = 0;
 		// compiler complains when P_GetZAt is used in FLOAT_TO_FIXED directly
 		// use this as a temp var to store P_GetZAt's return value each time
 		fixed_t temp;
-		// Okay, look, don't ask me why this works, but without this setup there's a disgusting-looking misalignment with the textures. -Red
-		const float fudge = ((1<<nflatshiftup)+1.0f)/(1<<nflatshiftup);
 
 		angle_t hack = (pl->plangle & (ANGLE_90-1));
 
 		yoffs *= 1;
 
-		if (hack)
-		{
-			/*
-			Essentially: We can't & the components along the regular axes when the plane is rotated.
-			This is because the distance on each regular axis in order to loop is different.
-			We rotate them, & the components, add them together, & them again, and then rotate them back.
-			These three seperate & operations are done per axis in order to prevent overflows.
-			toast 10/04/17
-			*/
-			const fixed_t cosinecomponent = FINECOSINE(hack>>ANGLETOFINESHIFT);
-			const fixed_t sinecomponent = FINESINE(hack>>ANGLETOFINESHIFT);
-
-			const fixed_t modmask = ((1 << (32-nflatshiftup)) - 1);
-
-			fixed_t ox = (FixedMul(pl->slope->o.x,cosinecomponent) & modmask) - (FixedMul(pl->slope->o.y,sinecomponent) & modmask);
-			fixed_t oy = (-FixedMul(pl->slope->o.x,sinecomponent) & modmask) - (FixedMul(pl->slope->o.y,cosinecomponent) & modmask);
-
-			temp = ox & modmask;
-			oy &= modmask;
-			ox = FixedMul(temp,cosinecomponent)+FixedMul(oy,-sinecomponent); // negative sine for opposite direction
-			oy = -FixedMul(temp,-sinecomponent)+FixedMul(oy,cosinecomponent);
-
-			temp = xoffs;
-			xoffs = (FixedMul(temp,cosinecomponent) & modmask) + (FixedMul(yoffs,sinecomponent) & modmask);
-			yoffs = (-FixedMul(temp,sinecomponent) & modmask) + (FixedMul(yoffs,cosinecomponent) & modmask);
-
-			temp = xoffs & modmask;
-			yoffs &= modmask;
-			xoffs = FixedMul(temp,cosinecomponent)+FixedMul(yoffs,-sinecomponent); // ditto
-			yoffs = -FixedMul(temp,-sinecomponent)+FixedMul(yoffs,cosinecomponent);
-
-			xoffs -= (pl->slope->o.x - ox);
-			yoffs += (pl->slope->o.y + oy);
-		}
-		else
+		if (ds_powersoftwo)
 		{
-			xoffs &= ((1 << (32-nflatshiftup))-1);
-			yoffs &= ((1 << (32-nflatshiftup))-1);
-			xoffs -= (pl->slope->o.x + (1 << (31-nflatshiftup))) & ~((1 << (32-nflatshiftup))-1);
-			yoffs += (pl->slope->o.y + (1 << (31-nflatshiftup))) & ~((1 << (32-nflatshiftup))-1);
+			// Okay, look, don't ask me why this works, but without this setup there's a disgusting-looking misalignment with the textures. -Red
+			fudge = ((1<<nflatshiftup)+1.0f)/(1<<nflatshiftup);
+			if (hack)
+			{
+				/*
+				Essentially: We can't & the components along the regular axes when the plane is rotated.
+				This is because the distance on each regular axis in order to loop is different.
+				We rotate them, & the components, add them together, & them again, and then rotate them back.
+				These three seperate & operations are done per axis in order to prevent overflows.
+				toast 10/04/17
+				*/
+				const fixed_t cosinecomponent = FINECOSINE(hack>>ANGLETOFINESHIFT);
+				const fixed_t sinecomponent = FINESINE(hack>>ANGLETOFINESHIFT);
+
+				const fixed_t modmask = ((1 << (32-nflatshiftup)) - 1);
+
+				fixed_t ox = (FixedMul(pl->slope->o.x,cosinecomponent) & modmask) - (FixedMul(pl->slope->o.y,sinecomponent) & modmask);
+				fixed_t oy = (-FixedMul(pl->slope->o.x,sinecomponent) & modmask) - (FixedMul(pl->slope->o.y,cosinecomponent) & modmask);
+
+				temp = ox & modmask;
+				oy &= modmask;
+				ox = FixedMul(temp,cosinecomponent)+FixedMul(oy,-sinecomponent); // negative sine for opposite direction
+				oy = -FixedMul(temp,-sinecomponent)+FixedMul(oy,cosinecomponent);
+
+				temp = xoffs;
+				xoffs = (FixedMul(temp,cosinecomponent) & modmask) + (FixedMul(yoffs,sinecomponent) & modmask);
+				yoffs = (-FixedMul(temp,sinecomponent) & modmask) + (FixedMul(yoffs,cosinecomponent) & modmask);
+
+				temp = xoffs & modmask;
+				yoffs &= modmask;
+				xoffs = FixedMul(temp,cosinecomponent)+FixedMul(yoffs,-sinecomponent); // ditto
+				yoffs = -FixedMul(temp,-sinecomponent)+FixedMul(yoffs,cosinecomponent);
+
+				xoffs -= (pl->slope->o.x - ox);
+				yoffs += (pl->slope->o.y + oy);
+			}
+			else
+			{
+				xoffs &= ((1 << (32-nflatshiftup))-1);
+				yoffs &= ((1 << (32-nflatshiftup))-1);
+				xoffs -= (pl->slope->o.x + (1 << (31-nflatshiftup))) & ~((1 << (32-nflatshiftup))-1);
+				yoffs += (pl->slope->o.y + (1 << (31-nflatshiftup))) & ~((1 << (32-nflatshiftup))-1);
+			}
+			xoffs = (fixed_t)(xoffs*fudge);
+			yoffs = (fixed_t)(yoffs/fudge);
 		}
 
-		xoffs = (fixed_t)(xoffs*fudge);
-		yoffs = (fixed_t)(yoffs/fudge);
-
 		vx = FIXED_TO_FLOAT(pl->viewx+xoffs);
 		vy = FIXED_TO_FLOAT(pl->viewy-yoffs);
 		vz = FIXED_TO_FLOAT(pl->viewz);
@@ -1033,13 +1080,16 @@ void R_DrawSinglePlane(visplane_t *pl)
 		temp = P_GetZAt(pl->slope, pl->viewx + FLOAT_TO_FIXED(cos(ang)), pl->viewy - FLOAT_TO_FIXED(sin(ang)));
 		n.y = FIXED_TO_FLOAT(temp) - zeroheight;
 
-		m.x /= fudge;
-		m.y /= fudge;
-		m.z /= fudge;
+		if (ds_powersoftwo)
+		{
+			m.x /= fudge;
+			m.y /= fudge;
+			m.z /= fudge;
 
-		n.x *= fudge;
-		n.y *= fudge;
-		n.z *= fudge;
+			n.x *= fudge;
+			n.y *= fudge;
+			n.z *= fudge;
+		}
 
 		// Eh. I tried making this stuff fixed-point and it exploded on me. Here's a macro for the only floating-point vector function I recall using.
 #define CROSS(d, v1, v2) \
@@ -1056,14 +1106,26 @@ void R_DrawSinglePlane(visplane_t *pl)
 		ds_sz.z *= focallengthf;
 
 		// Premultiply the texture vectors with the scale factors
+		if (ds_powersoftwo)
+		{
 #define SFMULT 65536.f*(1<<nflatshiftup)
-		ds_su.x *= SFMULT;
-		ds_su.y *= SFMULT;
-		ds_su.z *= SFMULT;
-		ds_sv.x *= SFMULT;
-		ds_sv.y *= SFMULT;
-		ds_sv.z *= SFMULT;
+			ds_su.x *= SFMULT;
+			ds_su.y *= SFMULT;
+			ds_su.z *= SFMULT;
+			ds_sv.x *= SFMULT;
+			ds_sv.y *= SFMULT;
+			ds_sv.z *= SFMULT;
 #undef SFMULT
+		}
+		else
+		{
+			ds_su.x *= 65536.f;
+			ds_su.y *= 65536.f;
+			ds_su.z *= 65536.f;
+			ds_sv.x *= 65536.f;
+			ds_sv.y *= 65536.f;
+			ds_sv.z *= 65536.f;
+		}
 
 		if (spanfunc == R_DrawTranslucentSpan_8)
 			spanfunc = R_DrawTiltedTranslucentSpan_8;
@@ -1170,8 +1232,6 @@ using the palette colors.
 		}
 	}
 #endif
-
-	Z_ChangeTag(ds_source, PU_CACHE);
 }
 
 void R_PlaneBounds(visplane_t *plane)
diff --git a/src/r_plane.h b/src/r_plane.h
index 238fde1827846f0be8278f6ee6a78e21aaf2c52b..11cbb56f1bcbffb94019630eaff2811f90c30e3a 100644
--- a/src/r_plane.h
+++ b/src/r_plane.h
@@ -96,6 +96,8 @@ void R_PlaneBounds(visplane_t *plane);
 
 // Draws a single visplane.
 void R_DrawSinglePlane(visplane_t *pl);
+void R_CheckFlatLength(size_t size);
+boolean R_CheckPowersOfTwo(void);
 
 typedef struct planemgr_s
 {
diff --git a/src/screen.c b/src/screen.c
index fc3f5b8e87f863112cdd88a5e7d0cd1f9faea95f..4bace52394bd14dcb56d20fc5d9744dbff1b26c1 100644
--- a/src/screen.c
+++ b/src/screen.c
@@ -133,7 +133,7 @@ void SCR_SetMode(void)
 				//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
 				walldrawerfunc = R_DrawWallColumn_8_MMX;
 				twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_MMX;
-				spanfunc = basespanfunc = R_DrawSpan_8_MMX;
+				//spanfunc = basespanfunc = R_DrawSpan_8_MMX;
 			}
 			else
 			{
diff --git a/src/w_wad.c b/src/w_wad.c
index d02ce9ce6061e43f3fa229e549c4821680d8090c..2fda8674c7fd15b0cdad41b57fccf237d0dbdd75 100644
--- a/src/w_wad.c
+++ b/src/w_wad.c
@@ -1182,8 +1182,6 @@ void zerr(int ret)
 }
 #endif
 
-#define NO_PNG_LUMPS
-
 #ifdef NO_PNG_LUMPS
 static void ErrorIfPNG(UINT8 *d, size_t s, char *f, char *l)
 {