diff --git a/libs/libpng-src/pngconf.h b/libs/libpng-src/pngconf.h
index 23f8313c3772d95b2261b0731b37d53e6efa8e38..5cb126197331b4d4dbc68c90980e686bb7fea730 100644
--- a/libs/libpng-src/pngconf.h
+++ b/libs/libpng-src/pngconf.h
@@ -62,7 +62,6 @@
 
 #define PNG_NO_READ_iTXt
 #define PNG_NO_READ_APNG
-#define PNG_NO_READ_UNKNOWN_CHUNKS
 #define PNG_NO_READ_USER_TRANSFORM
 #define PNG_READ_BGR_SUPPORTED
 #define PNG_NO_READ_SWAP_ALPHA
diff --git a/libs/libpng-src/projects/libpng32.a b/libs/libpng-src/projects/libpng32.a
index 74cb54e8c24dd3a924ff491086fb92d670298a83..5d2917502b9a01f0d75ca824d495d141b7ad49c7 100644
Binary files a/libs/libpng-src/projects/libpng32.a and b/libs/libpng-src/projects/libpng32.a differ
diff --git a/libs/libpng-src/projects/libpng64.a b/libs/libpng-src/projects/libpng64.a
index bba4c9121e706e450e764f7dc89ad809d3e38a14..cddf938ddc7f1d138e2cfc5f2bf48e05d2b29db0 100644
Binary files a/libs/libpng-src/projects/libpng64.a and b/libs/libpng-src/projects/libpng64.a differ
diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c
index 77ef265065fa67aba58a871f93488a0b4365f425..2574bc011663919c5038b4b95c9d77a187c26ad6 100644
--- a/src/hardware/hw_cache.c
+++ b/src/hardware/hw_cache.c
@@ -694,7 +694,7 @@ static void HWR_GenerateTexture(INT32 texnum, GLTexture_t *grtex)
 		realpatch = W_CacheLumpNumPwad(patch->wad, patch->lump, PU_CACHE);
 #ifndef NO_PNG_LUMPS
 		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
-			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength, NULL, false);
 #endif
 		HWR_DrawTexturePatchInCache(&grtex->mipmap,
 		                     blockwidth, blockheight,
@@ -724,6 +724,13 @@ void HWR_MakePatch (const patch_t *patch, GLPatch_t *grPatch, GLMipmap_t *grMipm
 {
 	INT32 newwidth, newheight;
 
+#ifndef NO_PNG_LUMPS
+	// lump is a png so convert it
+	size_t len = W_LumpLengthPwad(grPatch->wadnum, grPatch->lumpnum);
+	if ((patch != NULL) && R_IsLumpPNG((UINT8 *)patch, len))
+		patch = R_PNGToPatch((UINT8 *)patch, len, NULL, true);
+#endif
+
 	// don't do it twice (like a cache)
 	if (grMipmap->width == 0)
 	{
@@ -926,7 +933,7 @@ static void HWR_LoadPatchFlat(GLMipmap_t *grMipmap, lumpnum_t flatlumpnum)
 
 #ifndef NO_PNG_LUMPS
 	if (R_IsLumpPNG((UINT8 *)patch, lumplength))
-		patch = R_PNGToPatch((UINT8 *)patch, lumplength);
+		patch = R_PNGToPatch((UINT8 *)patch, lumplength, NULL, false);
 #endif
 
 	grMipmap->width  = (UINT16)SHORT(patch->width);
diff --git a/src/m_swap.h b/src/m_swap.h
index 2d42f6138efda6115d1d0527877ddb55a983d79a..3b50dc623737435bda8bd04eac6b50511e9cb626 100644
--- a/src/m_swap.h
+++ b/src/m_swap.h
@@ -14,29 +14,39 @@
 #ifndef __M_SWAP__
 #define __M_SWAP__
 
-#include "endian.h"
-
 // Endianess handling.
 // WAD files are stored little endian.
+#include "endian.h"
+
+// Little to big endian
 #ifdef SRB2_BIG_ENDIAN
 
-#define SHORT(x) ((INT16)(\
-(((UINT16)(x) & (UINT16)0x00ffU) << 8) \
-| \
-(((UINT16)(x) & (UINT16)0xff00U) >> 8))) \
+	#define SHORT(x) ((INT16)(\
+	(((UINT16)(x) & (UINT16)0x00ffU) << 8) \
+	| \
+	(((UINT16)(x) & (UINT16)0xff00U) >> 8))) \
 
-#define LONG(x) ((INT32)(\
-(((UINT32)(x) & (UINT32)0x000000ffUL) << 24) \
-| \
-(((UINT32)(x) & (UINT32)0x0000ff00UL) <<  8) \
-| \
-(((UINT32)(x) & (UINT32)0x00ff0000UL) >>  8) \
-| \
-(((UINT32)(x) & (UINT32)0xff000000UL) >> 24)))
+	#define LONG(x) ((INT32)(\
+	(((UINT32)(x) & (UINT32)0x000000ffUL) << 24) \
+	| \
+	(((UINT32)(x) & (UINT32)0x0000ff00UL) <<  8) \
+	| \
+	(((UINT32)(x) & (UINT32)0x00ff0000UL) >>  8) \
+	| \
+	(((UINT32)(x) & (UINT32)0xff000000UL) >> 24)))
+
+#else
+	#define SHORT(x) ((INT16)(x))
+	#define LONG(x)	((INT32)(x))
+#endif
 
+// Big to little endian
+#ifdef SRB2_LITTLE_ENDIAN
+	#define BIGENDIAN_LONG(x) ((INT32)(((x)>>24)&0xff)|(((x)<<8)&0xff0000)|(((x)>>8)&0xff00)|(((x)<<24)&0xff000000))
+	#define BIGENDIAN_SHORT(x) ((INT16)(((x)>>8)|((x)<<8)))
 #else
-#define SHORT(x) ((INT16)(x))
-#define LONG(x)	((INT32)(x))
+	#define BIGENDIAN_LONG(x) ((INT32)(x))
+	#define BIGENDIAN_SHORT(x) ((INT16)(x))
 #endif
 
 #endif
diff --git a/src/r_data.c b/src/r_data.c
index 38dc28980571b74cd7596b9c15d4cefffc949323..172a61da5863b9e76b71d7fc5c373f87b9543e39 100644
--- a/src/r_data.c
+++ b/src/r_data.c
@@ -476,7 +476,7 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 #ifndef NO_PNG_LUMPS
 		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
 		{
-			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength, NULL, false);
 			goto multipatch;
 		}
 #endif
@@ -569,7 +569,7 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 		realpatch = W_CacheLumpNumPwad(wadnum, lumpnum, PU_CACHE);
 #ifndef NO_PNG_LUMPS
 		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
-			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength, NULL, false);
 #endif
 
 		x1 = patch->originx;
@@ -2603,17 +2603,40 @@ typedef struct {
 	png_bytep buffer;
 	png_uint_32 bufsize;
 	png_uint_32 current_pos;
-} png_ioread;
+} png_io_t;
 
 static void PNG_IOReader(png_structp png_ptr, png_bytep data, png_size_t length)
 {
-	png_ioread *f = png_get_io_ptr(png_ptr);
+	png_io_t *f = png_get_io_ptr(png_ptr);
 	if (length > (f->bufsize - f->current_pos))
 		png_error(png_ptr, "PNG_IOReader: buffer overrun");
 	memcpy(data, f->buffer + f->current_pos, length);
 	f->current_pos += length;
 }
 
+typedef struct
+{
+	char name[4];
+	void *data;
+	size_t size;
+} png_chunk_t;
+
+static png_byte *chunkname = NULL;
+static png_chunk_t chunk;
+
+static int PNG_ChunkReader(png_structp png_ptr, png_unknown_chunkp chonk)
+{
+	if (!memcmp(chonk->name, chunkname, 4))
+	{
+		memcpy(chunk.name, chonk->name, 4);
+		chunk.size = chonk->size;
+		chunk.data = Z_Malloc(chunk.size, PU_STATIC, NULL);
+		memcpy(chunk.data, chonk->data, chunk.size);
+		return 1;
+	}
+	return 0;
+}
+
 static void PNG_error(png_structp PNG, png_const_charp pngtext)
 {
 	CONS_Debug(DBG_RENDER, "libpng error at %p: %s", PNG, pngtext);
@@ -2625,7 +2648,7 @@ static void PNG_warn(png_structp PNG, png_const_charp pngtext)
 	CONS_Debug(DBG_RENDER, "libpng warning at %p: %s", PNG, pngtext);
 }
 
-static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
+static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, INT16 *topoffset, INT16 *leftoffset, size_t size)
 {
 	png_structp png_ptr;
 	png_infop png_info_ptr;
@@ -2638,11 +2661,13 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 #endif
 #endif
 
-	png_ioread png_io;
+	png_io_t png_io;
 	png_bytep *row_pointers;
 
-	png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL,
-		PNG_error, PNG_warn);
+	png_byte grAb_chunk[5] = {'g', 'r', 'A', 'b', (png_byte)'\0'};
+	png_voidp *user_chunk_ptr;
+
+	png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, PNG_error, PNG_warn);
 	if (!png_ptr)
 	{
 		CONS_Debug(DBG_RENDER, "PNG_Load: Error on initialize libpng\n");
@@ -2677,14 +2702,19 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 	png_io.current_pos = 0;
 	png_set_read_fn(png_ptr, &png_io, PNG_IOReader);
 
+	memset(&chunk, 0x00, sizeof(png_chunk_t));
+	chunkname = grAb_chunk; // I want to read a grAb chunk
+
+	user_chunk_ptr = png_get_user_chunk_ptr(png_ptr);
+	png_set_read_user_chunk_fn(png_ptr, user_chunk_ptr, PNG_ChunkReader);
+	png_set_keep_unknown_chunks(png_ptr, 2, chunkname, 1);
+
 #ifdef PNG_SET_USER_LIMITS_SUPPORTED
 	png_set_user_limits(png_ptr, 2048, 2048);
 #endif
 
 	png_read_info(png_ptr, png_info_ptr);
-
-	png_get_IHDR(png_ptr, png_info_ptr, &width, &height, &bit_depth, &color_type,
-	 NULL, NULL, NULL);
+	png_get_IHDR(png_ptr, png_info_ptr, &width, &height, &bit_depth, &color_type, NULL, NULL, NULL);
 
 	if (bit_depth == 16)
 		png_set_strip_16(png_ptr);
@@ -2712,7 +2742,24 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 	for (y = 0; y < height; y++)
 		row_pointers[y] = (png_byte*)malloc(png_get_rowbytes(png_ptr, png_info_ptr));
 	png_read_image(png_ptr, row_pointers);
+
+	// Read grAB chunk
+	if ((topoffset || leftoffset) && (chunk.data != NULL))
+	{
+		INT32 *offsets = (INT32 *)chunk.data;
+		// read left offset
+		if (leftoffset != NULL)
+			*leftoffset = (INT16)BIGENDIAN_LONG(*offsets);
+		offsets++;
+		// read top offset
+		if (topoffset != NULL)
+			*topoffset = (INT16)BIGENDIAN_LONG(*offsets);
+	}
+
+	// bye
 	png_destroy_read_struct(&png_ptr, &png_info_ptr, NULL);
+	if (chunk.data)
+		Z_Free(chunk.data);
 
 	*w = (INT32)width;
 	*h = (INT32)height;
@@ -2720,11 +2767,11 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 }
 
 // Convert a PNG to a raw image.
-static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
+static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, INT16 *topoffset, INT16 *leftoffset, size_t size)
 {
 	UINT8 *flat;
 	png_uint_32 x, y;
-	png_bytep *row_pointers = PNG_Read(png, w, h, size);
+	png_bytep *row_pointers = PNG_Read(png, w, h, topoffset, leftoffset, size);
 	png_uint_32 width = *w, height = *h;
 
 	if (!row_pointers)
@@ -2751,15 +2798,16 @@ static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 // Convert a PNG to a flat.
 UINT8 *R_PNGToFlat(levelflat_t *levelflat, UINT8 *png, size_t size)
 {
-	return PNG_RawConvert(png, &levelflat->width, &levelflat->height, size);
+	return PNG_RawConvert(png, &levelflat->width, &levelflat->height, NULL, NULL, size);
 }
 
 // Convert a PNG to a patch.
 static unsigned char imgbuf[1<<26];
-patch_t *R_PNGToPatch(UINT8 *png, size_t size)
+patch_t *R_PNGToPatch(UINT8 *png, size_t size, size_t *destsize, boolean transparency)
 {
 	UINT16 width, height;
-	UINT8 *raw = PNG_RawConvert(png, &width, &height, size);
+	INT16 topoffset = 0, leftoffset = 0;
+	UINT8 *raw = PNG_RawConvert(png, &width, &height, &topoffset, &leftoffset, size);
 
 	UINT32 x, y;
 	UINT8 *img;
@@ -2776,9 +2824,8 @@ patch_t *R_PNGToPatch(UINT8 *png, size_t size)
 	// Write image size and offset
 	WRITE16(imgptr, width);
 	WRITE16(imgptr, height);
-	// no offsets
-	WRITE16(imgptr, 0);
-	WRITE16(imgptr, 0);
+	WRITE16(imgptr, leftoffset);
+	WRITE16(imgptr, topoffset);
 
 	// Leave placeholder to column pointers
 	colpointers = imgptr;
@@ -2799,6 +2846,16 @@ patch_t *R_PNGToPatch(UINT8 *png, size_t size)
 		for (y = 0; y < height; y++)
 		{
 			UINT8 paletteIndex = raw[((y * width) + x)];
+			boolean opaque = transparency ? (paletteIndex != TRANSPARENTPIXEL) : true;
+
+			// End span if we have a transparent pixel
+			if (!opaque)
+			{
+				if (startofspan)
+					WRITE8(imgptr, 0);
+				startofspan = NULL;
+				continue;
+			}
 
 			// Start new column if we need to
 			if (!startofspan || spanSize == 255)
@@ -2857,11 +2914,13 @@ patch_t *R_PNGToPatch(UINT8 *png, size_t size)
 	#undef WRITE32
 
 	size = imgptr-imgbuf;
-	img = malloc(size);
+	img = Z_Malloc(size, PU_STATIC, NULL);
 	memcpy(img, imgbuf, size);
 
 	Z_Free(raw);
 
+	if (destsize != NULL)
+		*destsize = size;
 	return (patch_t *)img;
 }
 
@@ -2877,7 +2936,7 @@ boolean R_PNGDimensions(UINT8 *png, INT16 *width, INT16 *height, size_t size)
 #endif
 #endif
 
-	png_ioread png_io;
+	png_io_t png_io;
 
 	png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL,
 		PNG_error, PNG_warn);
diff --git a/src/r_data.h b/src/r_data.h
index 91301100b3d7b366ea65869fc0be6085bce41ce3..38b7ba0ce4cd97b21ab412b29829a7a58a33ad03 100644
--- a/src/r_data.h
+++ b/src/r_data.h
@@ -168,7 +168,7 @@ void R_TextureToFlat(size_t tex, UINT8 *flat);
 boolean R_IsLumpPNG(UINT8 *d, size_t s);
 
 UINT8 *R_PNGToFlat(levelflat_t *levelflat, UINT8 *png, size_t size);
-patch_t *R_PNGToPatch(UINT8 *png, size_t size);
+patch_t *R_PNGToPatch(UINT8 *png, size_t size, size_t *destsize, boolean transparency);
 boolean R_PNGDimensions(UINT8 *png, INT16 *width, INT16 *height, size_t size);
 #endif
 
diff --git a/src/r_plane.c b/src/r_plane.c
index 51a69336e2466c2123e52cfdf4b3ebdfc470cb0b..db5fb0f249d690f70dacabaaca4e87767f7576d4 100644
--- a/src/r_plane.c
+++ b/src/r_plane.c
@@ -652,12 +652,18 @@ static void R_DrawSkyPlane(visplane_t *pl)
 
 boolean R_CheckPowersOfTwo(void)
 {
-	if (ds_flatwidth & (ds_flatwidth - 1))
-		ds_powersoftwo = false;
-	else if (ds_flatheight & (ds_flatheight - 1))
-		ds_powersoftwo = false;
-	else if (ds_flatwidth == ds_flatheight)
+	boolean wpow2 = (!(ds_flatwidth & (ds_flatwidth - 1)));
+	boolean hpow2 = (!(ds_flatheight & (ds_flatheight - 1)));
+
+	// Initially, the flat isn't powers-of-two-sized.
+	ds_powersoftwo = false;
+
+	// But if the width and height are powers of two,
+	// and are EQUAL, then it's okay :]
+	if ((ds_flatwidth == ds_flatheight) && (wpow2 && hpow2))
 		ds_powersoftwo = true;
+
+	// Just return ds_powersoftwo.
 	return ds_powersoftwo;
 }
 
@@ -806,6 +812,7 @@ void R_DrawSinglePlane(visplane_t *pl)
 	size_t size;
 	ffloor_t *rover;
 	levelflat_t *levelflat;
+	boolean rawflat = false;
 
 	if (!(pl->minx <= pl->maxx))
 		return;
@@ -968,6 +975,7 @@ void R_DrawSinglePlane(visplane_t *pl)
 	// It's a raw flat.
 	else
 	{
+		rawflat = true;
 		R_CheckFlatLength(size);
 		flat = ds_source;
 	}
@@ -978,8 +986,11 @@ void R_DrawSinglePlane(visplane_t *pl)
 	if (ds_source == NULL)
 		return;
 
-	// Check if the flat has dimensions that are powers-of-two numbers.
-	if (R_CheckPowersOfTwo())
+	// Raw flats always have dimensions that are powers-of-two numbers.
+	if (rawflat)
+		ds_powersoftwo = true;
+	// Otherwise, check if this texture or patch has such dimensions.
+	else if (R_CheckPowersOfTwo())
 	{
 		R_CheckFlatLength(ds_flatwidth * ds_flatheight);
 		if (spanfunc == basespanfunc)
@@ -1116,26 +1127,27 @@ void R_DrawSinglePlane(visplane_t *pl)
 		ds_sz.z *= focallengthf;
 
 		// Premultiply the texture vectors with the scale factors
+#define SFMULT 65536.f
 		if (ds_powersoftwo)
 		{
-#define SFMULT 65536.f*(1<<nflatshiftup)
+			ds_su.x *= (SFMULT * (1<<nflatshiftup));
+			ds_su.y *= (SFMULT * (1<<nflatshiftup));
+			ds_su.z *= (SFMULT * (1<<nflatshiftup));
+			ds_sv.x *= (SFMULT * (1<<nflatshiftup));
+			ds_sv.y *= (SFMULT * (1<<nflatshiftup));
+			ds_sv.z *= (SFMULT * (1<<nflatshiftup));
+		}
+		else
+		{
+			// I'm essentially multiplying the vectors by FRACUNIT...
 			ds_su.x *= SFMULT;
 			ds_su.y *= SFMULT;
 			ds_su.z *= SFMULT;
 			ds_sv.x *= SFMULT;
 			ds_sv.y *= SFMULT;
 			ds_sv.z *= SFMULT;
-#undef SFMULT
-		}
-		else
-		{
-			ds_su.x *= 65536.f;
-			ds_su.y *= 65536.f;
-			ds_su.z *= 65536.f;
-			ds_sv.x *= 65536.f;
-			ds_sv.y *= 65536.f;
-			ds_sv.z *= 65536.f;
 		}
+#undef SFMULT
 
 		if (spanfunc == R_DrawTranslucentSpan_8)
 			spanfunc = R_DrawTiltedTranslucentSpan_8;
diff --git a/src/r_things.c b/src/r_things.c
index 92f2b946020bde6317faf9f52102cdab58f10d2d..5940e2189514057c71d65a28d82c886511b41934 100644
--- a/src/r_things.c
+++ b/src/r_things.c
@@ -254,6 +254,19 @@ static boolean R_AddSingleSpriteDef(const char *sprname, spritedef_t *spritedef,
 			// store sprite info in lookup tables
 			//FIXME : numspritelumps do not duplicate sprite replacements
 			W_ReadLumpHeaderPwad(wadnum, l, &patch, sizeof (patch_t), 0);
+#ifndef NO_PNG_LUMPS
+			{
+				patch_t *png = W_CacheLumpNumPwad(wadnum, l, PU_STATIC);
+				size_t len = W_LumpLengthPwad(wadnum, l);
+				// lump is a png so convert it
+				if (R_IsLumpPNG((UINT8 *)png, len))
+				{
+					png = R_PNGToPatch((UINT8 *)png, len, NULL, true);
+					M_Memcpy(&patch, png, sizeof(INT16)*4);
+				}
+				Z_Free(png);
+			}
+#endif
 			spritecachedinfo[numspritelumps].width = SHORT(patch.width)<<FRACBITS;
 			spritecachedinfo[numspritelumps].offset = SHORT(patch.leftoffset)<<FRACBITS;
 			spritecachedinfo[numspritelumps].topoffset = SHORT(patch.topoffset)<<FRACBITS;
@@ -701,7 +714,7 @@ static void R_DrawVisSprite(vissprite_t *vis)
 	INT32 texturecolumn;
 #endif
 	fixed_t frac;
-	patch_t *patch = W_CacheLumpNum(vis->patch, PU_CACHE);
+	patch_t *patch = W_CachePatchNum(vis->patch, PU_CACHE);
 	fixed_t this_scale = vis->mobj->scale;
 	INT32 x1, x2;
 	INT64 overflow_test;
@@ -870,7 +883,7 @@ static void R_DrawPrecipitationVisSprite(vissprite_t *vis)
 	INT64 overflow_test;
 
 	//Fab : R_InitSprites now sets a wad lump number
-	patch = W_CacheLumpNum(vis->patch, PU_CACHE);
+	patch = W_CachePatchNum(vis->patch, PU_CACHE);
 	if (!patch)
 		return;
 
diff --git a/src/w_wad.c b/src/w_wad.c
index 2fda8674c7fd15b0cdad41b57fccf237d0dbdd75..9688de3284674a2d3c504c9fa4dd7adeeec65104 100644
--- a/src/w_wad.c
+++ b/src/w_wad.c
@@ -789,6 +789,7 @@ UINT16 W_InitFile(const char *filename, boolean mainfile)
 	// set up caching
 	//
 	Z_Calloc(numlumps * sizeof (*wadfile->lumpcache), PU_STATIC, &wadfile->lumpcache);
+	Z_Calloc(numlumps * sizeof (*wadfile->patchcache), PU_STATIC, &wadfile->patchcache);
 
 #ifdef HWRENDER
 	// allocates GLPatch info structures and store them in a tree
@@ -1457,6 +1458,38 @@ boolean W_IsLumpCached(lumpnum_t lumpnum, void *ptr)
 	return W_IsLumpCachedPWAD(WADFILENUM(lumpnum),LUMPNUM(lumpnum), ptr);
 }
 
+//
+// W_IsPatchCached
+//
+// If a patch is already cached return true, otherwise
+// return false.
+//
+// no outside code uses the PWAD form, for now
+static inline boolean W_IsPatchCachedPWAD(UINT16 wad, UINT16 lump, void *ptr)
+{
+	void *lcache;
+
+	if (!TestValidLump(wad, lump))
+		return false;
+
+	lcache = wadfiles[wad]->patchcache[lump];
+
+	if (ptr)
+	{
+		if (ptr == lcache)
+			return true;
+	}
+	else if (lcache)
+		return true;
+
+	return false;
+}
+
+boolean W_IsPatchCached(lumpnum_t lumpnum, void *ptr)
+{
+	return W_IsPatchCachedPWAD(WADFILENUM(lumpnum),LUMPNUM(lumpnum), ptr);
+}
+
 // ==========================================================================
 // W_CacheLumpName
 // ==========================================================================
@@ -1480,18 +1513,53 @@ void *W_CacheLumpName(const char *name, INT32 tag)
 // Cache a patch into heap memory, convert the patch format as necessary
 //
 
-// Software-only compile cache the data without conversion
-#ifdef HWRENDER
-static inline void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
+void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
 {
+#ifdef HWRENDER
 	GLPatch_t *grPatch;
-
-	if (rendermode == render_soft || rendermode == render_none)
-		return W_CacheLumpNumPwad(wad, lump, tag);
+#endif
 
 	if (!TestValidLump(wad, lump))
 		return NULL;
 
+#ifdef HWRENDER
+	// Software-only compile cache the data without conversion
+	if (rendermode == render_soft || rendermode == render_none)
+	{
+#endif
+		lumpcache_t *lumpcache = wadfiles[wad]->patchcache;
+		if (!lumpcache[lump])
+		{
+			size_t len = W_LumpLengthPwad(wad, lump);
+			void *ptr, *lumpdata, *srcdata = NULL;
+
+			ptr = Z_Malloc(len, tag, &lumpcache[lump]);
+			lumpdata = Z_Malloc(len, tag, NULL);
+
+			// read the lump in full
+			W_ReadLumpHeaderPwad(wad, lump, lumpdata, 0, 0);
+
+#ifndef NO_PNG_LUMPS
+			// lump is a png so convert it
+			if (R_IsLumpPNG((UINT8 *)lumpdata, len))
+			{
+				size_t newlen;
+				srcdata = R_PNGToPatch((UINT8 *)lumpdata, len, &newlen, true);
+				ptr = Z_Realloc(ptr, newlen, tag, &lumpcache[lump]);
+				M_Memcpy(ptr, srcdata, newlen);
+				Z_Free(srcdata);
+			}
+			else // just copy it into the patch cache
+#endif
+				M_Memcpy(ptr, lumpdata, len);
+		}
+		else
+			Z_ChangeTag(lumpcache[lump], tag);
+
+		return lumpcache[lump];
+#ifdef HWRENDER
+	}
+
 	grPatch = HWR_GetCachedGLPatchPwad(wad, lump);
 
 	if (grPatch->mipmap.grInfo.data)
@@ -1515,6 +1583,7 @@ static inline void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
 
 	// return GLPatch_t, which can be casted to (patch_t) with valid patch header info
 	return (void *)grPatch;
+#endif
 }
 
 void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag)
@@ -1522,8 +1591,6 @@ void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag)
 	return W_CachePatchNumPwad(WADFILENUM(lumpnum),LUMPNUM(lumpnum),tag);
 }
 
-#endif // HWRENDER
-
 void W_UnlockCachedPatch(void *patch)
 {
 	// The hardware code does its own memory management, as its patches
diff --git a/src/w_wad.h b/src/w_wad.h
index 6517388504d39b66945f284b9bd294aa1bcaae18..91d4e733e94ecdf1bfbeab2dd73edc71798bc0d1 100644
--- a/src/w_wad.h
+++ b/src/w_wad.h
@@ -102,6 +102,7 @@ typedef struct wadfile_s
 	restype_t type;
 	lumpinfo_t *lumpinfo;
 	lumpcache_t *lumpcache;
+	lumpcache_t *patchcache;
 #ifdef HWRENDER
 	aatree_t *hwrcache; // patches are cached in renderer's native format
 #endif
@@ -167,17 +168,13 @@ void *W_CacheLumpNum(lumpnum_t lump, INT32 tag);
 void *W_CacheLumpNumForce(lumpnum_t lumpnum, INT32 tag);
 
 boolean W_IsLumpCached(lumpnum_t lump, void *ptr);
+boolean W_IsPatchCached(lumpnum_t lump, void *ptr);
 
 void *W_CacheLumpName(const char *name, INT32 tag);
 void *W_CachePatchName(const char *name, INT32 tag);
 
-#ifdef HWRENDER
-//void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag); // return a patch_t
+void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag); // return a patch_t
 void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag); // return a patch_t
-#else
-//#define W_CachePatchNumPwad(wad, lump, tag) W_CacheLumpNumPwad(wad, lump, tag)
-#define W_CachePatchNum(lumpnum, tag) W_CacheLumpNum(lumpnum, tag)
-#endif
 
 void W_UnlockCachedPatch(void *patch);