diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c
index 77ef265065fa67aba58a871f93488a0b4365f425..2574bc011663919c5038b4b95c9d77a187c26ad6 100644
--- a/src/hardware/hw_cache.c
+++ b/src/hardware/hw_cache.c
@@ -694,7 +694,7 @@ static void HWR_GenerateTexture(INT32 texnum, GLTexture_t *grtex)
 		realpatch = W_CacheLumpNumPwad(patch->wad, patch->lump, PU_CACHE);
 #ifndef NO_PNG_LUMPS
 		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
-			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength, NULL, false);
 #endif
 		HWR_DrawTexturePatchInCache(&grtex->mipmap,
 		                     blockwidth, blockheight,
@@ -724,6 +724,13 @@ void HWR_MakePatch (const patch_t *patch, GLPatch_t *grPatch, GLMipmap_t *grMipm
 {
 	INT32 newwidth, newheight;
 
+#ifndef NO_PNG_LUMPS
+	// lump is a png so convert it
+	size_t len = W_LumpLengthPwad(grPatch->wadnum, grPatch->lumpnum);
+	if ((patch != NULL) && R_IsLumpPNG((UINT8 *)patch, len))
+		patch = R_PNGToPatch((UINT8 *)patch, len, NULL, true);
+#endif
+
 	// don't do it twice (like a cache)
 	if (grMipmap->width == 0)
 	{
@@ -926,7 +933,7 @@ static void HWR_LoadPatchFlat(GLMipmap_t *grMipmap, lumpnum_t flatlumpnum)
 
 #ifndef NO_PNG_LUMPS
 	if (R_IsLumpPNG((UINT8 *)patch, lumplength))
-		patch = R_PNGToPatch((UINT8 *)patch, lumplength);
+		patch = R_PNGToPatch((UINT8 *)patch, lumplength, NULL, false);
 #endif
 
 	grMipmap->width  = (UINT16)SHORT(patch->width);
diff --git a/src/r_data.c b/src/r_data.c
index 38dc28980571b74cd7596b9c15d4cefffc949323..a080fa4c3d6129b800b1a284717d1c5158fc5cb1 100644
--- a/src/r_data.c
+++ b/src/r_data.c
@@ -476,7 +476,7 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 #ifndef NO_PNG_LUMPS
 		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
 		{
-			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength, NULL, false);
 			goto multipatch;
 		}
 #endif
@@ -569,7 +569,7 @@ static UINT8 *R_GenerateTexture(size_t texnum)
 		realpatch = W_CacheLumpNumPwad(wadnum, lumpnum, PU_CACHE);
 #ifndef NO_PNG_LUMPS
 		if (R_IsLumpPNG((UINT8 *)realpatch, lumplength))
-			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength);
+			realpatch = R_PNGToPatch((UINT8 *)realpatch, lumplength, NULL, false);
 #endif
 
 		x1 = patch->originx;
@@ -2625,7 +2625,7 @@ static void PNG_warn(png_structp PNG, png_const_charp pngtext)
 	CONS_Debug(DBG_RENDER, "libpng warning at %p: %s", PNG, pngtext);
 }
 
-static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
+static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, INT16 *topoffset, INT16 *leftoffset, size_t size)
 {
 	png_structp png_ptr;
 	png_infop png_info_ptr;
@@ -2683,8 +2683,7 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 
 	png_read_info(png_ptr, png_info_ptr);
 
-	png_get_IHDR(png_ptr, png_info_ptr, &width, &height, &bit_depth, &color_type,
-	 NULL, NULL, NULL);
+	png_get_IHDR(png_ptr, png_info_ptr, &width, &height, &bit_depth, &color_type, NULL, NULL, NULL);
 
 	if (bit_depth == 16)
 		png_set_strip_16(png_ptr);
@@ -2712,6 +2711,38 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 	for (y = 0; y < height; y++)
 		row_pointers[y] = (png_byte*)malloc(png_get_rowbytes(png_ptr, png_info_ptr));
 	png_read_image(png_ptr, row_pointers);
+
+	// Read grAB chunk
+	if (topoffset || leftoffset)
+	{
+		UINT8 *header = png;
+		while (size--)
+		{
+			if (!memcmp(header, "grAb", 4))
+			{
+				// grAb stores numbers as big-endian.
+				#ifdef SRB2_BIG_ENDIAN
+					#define ENDIANESS(x) (x)
+				#else
+					#define ENDIANESS(x) ((x>>24)&0xff)|((x<<8)&0xff0000)|((x>>8)&0xff00)|((x<<24)&0xff000000)
+				#endif
+				// skip name
+				header += 4;
+				// read left offset
+				if (leftoffset != NULL)
+					*leftoffset = (INT16)ENDIANESS(*(INT32 *)header);
+				// read top offset
+				header += 4;
+				if (topoffset != NULL)
+					*topoffset = (INT16)ENDIANESS(*(INT32 *)header);
+				#undef ENDIANESS
+				break;
+			}
+			header++;
+		}
+	}
+
+	// bye
 	png_destroy_read_struct(&png_ptr, &png_info_ptr, NULL);
 
 	*w = (INT32)width;
@@ -2720,11 +2751,11 @@ static png_bytep *PNG_Read(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 }
 
 // Convert a PNG to a raw image.
-static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
+static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, INT16 *topoffset, INT16 *leftoffset, size_t size)
 {
 	UINT8 *flat;
 	png_uint_32 x, y;
-	png_bytep *row_pointers = PNG_Read(png, w, h, size);
+	png_bytep *row_pointers = PNG_Read(png, w, h, topoffset, leftoffset, size);
 	png_uint_32 width = *w, height = *h;
 
 	if (!row_pointers)
@@ -2751,15 +2782,16 @@ static UINT8 *PNG_RawConvert(UINT8 *png, UINT16 *w, UINT16 *h, size_t size)
 // Convert a PNG to a flat.
 UINT8 *R_PNGToFlat(levelflat_t *levelflat, UINT8 *png, size_t size)
 {
-	return PNG_RawConvert(png, &levelflat->width, &levelflat->height, size);
+	return PNG_RawConvert(png, &levelflat->width, &levelflat->height, NULL, NULL, size);
 }
 
 // Convert a PNG to a patch.
 static unsigned char imgbuf[1<<26];
-patch_t *R_PNGToPatch(UINT8 *png, size_t size)
+patch_t *R_PNGToPatch(UINT8 *png, size_t size, size_t *destsize, boolean translucency)
 {
 	UINT16 width, height;
-	UINT8 *raw = PNG_RawConvert(png, &width, &height, size);
+	INT16 topoffset = 0, leftoffset = 0;
+	UINT8 *raw = PNG_RawConvert(png, &width, &height, &topoffset, &leftoffset, size);
 
 	UINT32 x, y;
 	UINT8 *img;
@@ -2776,9 +2808,8 @@ patch_t *R_PNGToPatch(UINT8 *png, size_t size)
 	// Write image size and offset
 	WRITE16(imgptr, width);
 	WRITE16(imgptr, height);
-	// no offsets
-	WRITE16(imgptr, 0);
-	WRITE16(imgptr, 0);
+	WRITE16(imgptr, leftoffset);
+	WRITE16(imgptr, topoffset);
 
 	// Leave placeholder to column pointers
 	colpointers = imgptr;
@@ -2799,6 +2830,16 @@ patch_t *R_PNGToPatch(UINT8 *png, size_t size)
 		for (y = 0; y < height; y++)
 		{
 			UINT8 paletteIndex = raw[((y * width) + x)];
+			boolean opaque = translucency ? (paletteIndex != TRANSPARENTPIXEL) : true;
+
+			// End span if we have a transparent pixel
+			if (!opaque)
+			{
+				if (startofspan)
+					WRITE8(imgptr, 0);
+				startofspan = NULL;
+				continue;
+			}
 
 			// Start new column if we need to
 			if (!startofspan || spanSize == 255)
@@ -2857,11 +2898,13 @@ patch_t *R_PNGToPatch(UINT8 *png, size_t size)
 	#undef WRITE32
 
 	size = imgptr-imgbuf;
-	img = malloc(size);
+	img = Z_Malloc(size, PU_STATIC, NULL);
 	memcpy(img, imgbuf, size);
 
 	Z_Free(raw);
 
+	if (destsize != NULL)
+		*destsize = size;
 	return (patch_t *)img;
 }
 
diff --git a/src/r_data.h b/src/r_data.h
index 91301100b3d7b366ea65869fc0be6085bce41ce3..38b7ba0ce4cd97b21ab412b29829a7a58a33ad03 100644
--- a/src/r_data.h
+++ b/src/r_data.h
@@ -168,7 +168,7 @@ void R_TextureToFlat(size_t tex, UINT8 *flat);
 boolean R_IsLumpPNG(UINT8 *d, size_t s);
 
 UINT8 *R_PNGToFlat(levelflat_t *levelflat, UINT8 *png, size_t size);
-patch_t *R_PNGToPatch(UINT8 *png, size_t size);
+patch_t *R_PNGToPatch(UINT8 *png, size_t size, size_t *destsize, boolean transparency);
 boolean R_PNGDimensions(UINT8 *png, INT16 *width, INT16 *height, size_t size);
 #endif
 
diff --git a/src/r_things.c b/src/r_things.c
index 92f2b946020bde6317faf9f52102cdab58f10d2d..5940e2189514057c71d65a28d82c886511b41934 100644
--- a/src/r_things.c
+++ b/src/r_things.c
@@ -254,6 +254,19 @@ static boolean R_AddSingleSpriteDef(const char *sprname, spritedef_t *spritedef,
 			// store sprite info in lookup tables
 			//FIXME : numspritelumps do not duplicate sprite replacements
 			W_ReadLumpHeaderPwad(wadnum, l, &patch, sizeof (patch_t), 0);
+#ifndef NO_PNG_LUMPS
+			{
+				patch_t *png = W_CacheLumpNumPwad(wadnum, l, PU_STATIC);
+				size_t len = W_LumpLengthPwad(wadnum, l);
+				// lump is a png so convert it
+				if (R_IsLumpPNG((UINT8 *)png, len))
+				{
+					png = R_PNGToPatch((UINT8 *)png, len, NULL, true);
+					M_Memcpy(&patch, png, sizeof(INT16)*4);
+				}
+				Z_Free(png);
+			}
+#endif
 			spritecachedinfo[numspritelumps].width = SHORT(patch.width)<<FRACBITS;
 			spritecachedinfo[numspritelumps].offset = SHORT(patch.leftoffset)<<FRACBITS;
 			spritecachedinfo[numspritelumps].topoffset = SHORT(patch.topoffset)<<FRACBITS;
@@ -701,7 +714,7 @@ static void R_DrawVisSprite(vissprite_t *vis)
 	INT32 texturecolumn;
 #endif
 	fixed_t frac;
-	patch_t *patch = W_CacheLumpNum(vis->patch, PU_CACHE);
+	patch_t *patch = W_CachePatchNum(vis->patch, PU_CACHE);
 	fixed_t this_scale = vis->mobj->scale;
 	INT32 x1, x2;
 	INT64 overflow_test;
@@ -870,7 +883,7 @@ static void R_DrawPrecipitationVisSprite(vissprite_t *vis)
 	INT64 overflow_test;
 
 	//Fab : R_InitSprites now sets a wad lump number
-	patch = W_CacheLumpNum(vis->patch, PU_CACHE);
+	patch = W_CachePatchNum(vis->patch, PU_CACHE);
 	if (!patch)
 		return;
 
diff --git a/src/w_wad.c b/src/w_wad.c
index 2fda8674c7fd15b0cdad41b57fccf237d0dbdd75..9688de3284674a2d3c504c9fa4dd7adeeec65104 100644
--- a/src/w_wad.c
+++ b/src/w_wad.c
@@ -789,6 +789,7 @@ UINT16 W_InitFile(const char *filename, boolean mainfile)
 	// set up caching
 	//
 	Z_Calloc(numlumps * sizeof (*wadfile->lumpcache), PU_STATIC, &wadfile->lumpcache);
+	Z_Calloc(numlumps * sizeof (*wadfile->patchcache), PU_STATIC, &wadfile->patchcache);
 
 #ifdef HWRENDER
 	// allocates GLPatch info structures and store them in a tree
@@ -1457,6 +1458,38 @@ boolean W_IsLumpCached(lumpnum_t lumpnum, void *ptr)
 	return W_IsLumpCachedPWAD(WADFILENUM(lumpnum),LUMPNUM(lumpnum), ptr);
 }
 
+//
+// W_IsPatchCached
+//
+// If a patch is already cached return true, otherwise
+// return false.
+//
+// no outside code uses the PWAD form, for now
+static inline boolean W_IsPatchCachedPWAD(UINT16 wad, UINT16 lump, void *ptr)
+{
+	void *lcache;
+
+	if (!TestValidLump(wad, lump))
+		return false;
+
+	lcache = wadfiles[wad]->patchcache[lump];
+
+	if (ptr)
+	{
+		if (ptr == lcache)
+			return true;
+	}
+	else if (lcache)
+		return true;
+
+	return false;
+}
+
+boolean W_IsPatchCached(lumpnum_t lumpnum, void *ptr)
+{
+	return W_IsPatchCachedPWAD(WADFILENUM(lumpnum),LUMPNUM(lumpnum), ptr);
+}
+
 // ==========================================================================
 // W_CacheLumpName
 // ==========================================================================
@@ -1480,18 +1513,53 @@ void *W_CacheLumpName(const char *name, INT32 tag)
 // Cache a patch into heap memory, convert the patch format as necessary
 //
 
-// Software-only compile cache the data without conversion
-#ifdef HWRENDER
-static inline void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
+void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
 {
+#ifdef HWRENDER
 	GLPatch_t *grPatch;
-
-	if (rendermode == render_soft || rendermode == render_none)
-		return W_CacheLumpNumPwad(wad, lump, tag);
+#endif
 
 	if (!TestValidLump(wad, lump))
 		return NULL;
 
+#ifdef HWRENDER
+	// Software-only compile cache the data without conversion
+	if (rendermode == render_soft || rendermode == render_none)
+	{
+#endif
+		lumpcache_t *lumpcache = wadfiles[wad]->patchcache;
+		if (!lumpcache[lump])
+		{
+			size_t len = W_LumpLengthPwad(wad, lump);
+			void *ptr, *lumpdata, *srcdata = NULL;
+
+			ptr = Z_Malloc(len, tag, &lumpcache[lump]);
+			lumpdata = Z_Malloc(len, tag, NULL);
+
+			// read the lump in full
+			W_ReadLumpHeaderPwad(wad, lump, lumpdata, 0, 0);
+
+#ifndef NO_PNG_LUMPS
+			// lump is a png so convert it
+			if (R_IsLumpPNG((UINT8 *)lumpdata, len))
+			{
+				size_t newlen;
+				srcdata = R_PNGToPatch((UINT8 *)lumpdata, len, &newlen, true);
+				ptr = Z_Realloc(ptr, newlen, tag, &lumpcache[lump]);
+				M_Memcpy(ptr, srcdata, newlen);
+				Z_Free(srcdata);
+			}
+			else // just copy it into the patch cache
+#endif
+				M_Memcpy(ptr, lumpdata, len);
+		}
+		else
+			Z_ChangeTag(lumpcache[lump], tag);
+
+		return lumpcache[lump];
+#ifdef HWRENDER
+	}
+
 	grPatch = HWR_GetCachedGLPatchPwad(wad, lump);
 
 	if (grPatch->mipmap.grInfo.data)
@@ -1515,6 +1583,7 @@ static inline void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag)
 
 	// return GLPatch_t, which can be casted to (patch_t) with valid patch header info
 	return (void *)grPatch;
+#endif
 }
 
 void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag)
@@ -1522,8 +1591,6 @@ void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag)
 	return W_CachePatchNumPwad(WADFILENUM(lumpnum),LUMPNUM(lumpnum),tag);
 }
 
-#endif // HWRENDER
-
 void W_UnlockCachedPatch(void *patch)
 {
 	// The hardware code does its own memory management, as its patches
diff --git a/src/w_wad.h b/src/w_wad.h
index 6517388504d39b66945f284b9bd294aa1bcaae18..91d4e733e94ecdf1bfbeab2dd73edc71798bc0d1 100644
--- a/src/w_wad.h
+++ b/src/w_wad.h
@@ -102,6 +102,7 @@ typedef struct wadfile_s
 	restype_t type;
 	lumpinfo_t *lumpinfo;
 	lumpcache_t *lumpcache;
+	lumpcache_t *patchcache;
 #ifdef HWRENDER
 	aatree_t *hwrcache; // patches are cached in renderer's native format
 #endif
@@ -167,17 +168,13 @@ void *W_CacheLumpNum(lumpnum_t lump, INT32 tag);
 void *W_CacheLumpNumForce(lumpnum_t lumpnum, INT32 tag);
 
 boolean W_IsLumpCached(lumpnum_t lump, void *ptr);
+boolean W_IsPatchCached(lumpnum_t lump, void *ptr);
 
 void *W_CacheLumpName(const char *name, INT32 tag);
 void *W_CachePatchName(const char *name, INT32 tag);
 
-#ifdef HWRENDER
-//void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag); // return a patch_t
+void *W_CachePatchNumPwad(UINT16 wad, UINT16 lump, INT32 tag); // return a patch_t
 void *W_CachePatchNum(lumpnum_t lumpnum, INT32 tag); // return a patch_t
-#else
-//#define W_CachePatchNumPwad(wad, lump, tag) W_CacheLumpNumPwad(wad, lump, tag)
-#define W_CachePatchNum(lumpnum, tag) W_CacheLumpNum(lumpnum, tag)
-#endif
 
 void W_UnlockCachedPatch(void *patch);