diff --git a/src/f_wipe.c b/src/f_wipe.c
index 4b4ecd7e3848990353d1fa6526d47023fe2c1cb3..69e956216225003bdaaaf94920a28e43d5c5354e 100644
--- a/src/f_wipe.c
+++ b/src/f_wipe.c
@@ -172,15 +172,6 @@ static fademask_t *F_GetFadeMask(UINT8 masknum, UINT8 scrnnum) {
   */
 static void F_DoWipe(fademask_t *fademask)
 {
-#ifdef HWRENDER
-	/// \todo Mask wipes for OpenGL
-	if(rendermode != render_soft)
-	{
-		HWR_DoScreenWipe();
-		return;
-	}
-#endif
-
 	// Software mask wipe -- optimized; though it might not look like it!
 	// Okay, to save you wondering *how* this is more optimized than the simpler
 	// version that came before it...
@@ -344,6 +335,11 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu)
 			I_Sleep();
 		lastwipetic = nowtime;
 
+#ifdef HWRENDER
+		if (rendermode == render_opengl)
+			HWR_DoWipe(wipetype, wipeframe-1); // send in the wipe type and wipeframe because we need to cache the graphic
+		else
+#endif
 		F_DoWipe(fmask);
 		I_OsPolling();
 		I_UpdateNoBlit();
diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c
index 3899b75b86fad79548bcf7f4e182fe71bea029cf..4b2ff9f22d5ff3268f7e97815339afbcf77c1a8b 100644
--- a/src/hardware/hw_cache.c
+++ b/src/hardware/hw_cache.c
@@ -1002,4 +1002,108 @@ GLPatch_t *HWR_GetCachedGLPatch(lumpnum_t lumpnum)
 	return HWR_GetCachedGLPatchPwad(WADFILENUM(lumpnum),LUMPNUM(lumpnum));
 }
 
+// Need to do this because they aren't powers of 2
+static void HWR_DrawFadeMaskInCache(GLMipmap_t *mipmap, INT32 pblockwidth, INT32 pblockheight,
+	lumpnum_t fademasklumpnum, UINT16 fmwidth, UINT16 fmheight)
+{
+	INT32 i,j;
+	fixed_t posx, posy, stepx, stepy;
+	UINT8 *block = mipmap->grInfo.data; // places the data directly into here, it already has the space allocated from HWR_ResizeBlock
+	UINT8 *flat;
+	UINT8 *dest, *src, texel;
+	RGBA_t col;
+
+	// Place the flats data into flat
+	W_ReadLump(fademasklumpnum, Z_Malloc(W_LumpLength(fademasklumpnum),
+		PU_HWRCACHE, &flat));
+
+	stepy = ((INT32)SHORT(fmheight)<<FRACBITS)/pblockheight;
+	stepx = ((INT32)SHORT(fmwidth)<<FRACBITS)/pblockwidth;
+	posy = 0;
+	for (j = 0; j < pblockheight; j++)
+	{
+		posx = 0;
+		dest = &block[j*blockwidth]; // 1bpp
+		src = &flat[(posy>>FRACBITS)*SHORT(fmwidth)];
+		for (i = 0; i < pblockwidth;i++)
+		{
+			// fademask bpp is always 1, and is used just for alpha
+			texel = src[(posx)>>FRACBITS];
+			col = V_GetColor(texel);
+			*dest = col.s.red; // take the red level of the colour and use it for alpha, as fademasks do
+
+			dest++;
+			posx += stepx;
+		}
+		posy += stepy;
+	}
+
+	Z_Free(flat);
+}
+
+static void HWR_CacheFadeMask(GLMipmap_t *grMipmap, lumpnum_t fademasklumpnum)
+{
+	size_t size;
+	UINT16 fmheight = 0, fmwidth = 0;
+	UINT8 *block; // The fade mask's pixels
+
+	// setup the texture info
+	grMipmap->grInfo.format = GR_TEXFMT_ALPHA_8; // put the correct alpha levels straight in so I don't need to convert it later
+	grMipmap->flags = 0;
+
+	size = W_LumpLength(fademasklumpnum);
+
+	switch (size)
+	{
+		// None of these are powers of 2, so I'll need to do what is done for textures and make them powers of 2 before they can be used
+		case 256000: // 640x400
+			fmwidth = 640;
+			fmheight = 400;
+			break;
+		case 64000: // 320x200
+			fmwidth = 320;
+			fmheight = 200;
+			break;
+		case 16000: // 160x100
+			fmwidth = 160;
+			fmheight = 100;
+			break;
+		case 4000: // 80x50 (minimum)
+			fmwidth = 80;
+			fmheight = 50;
+			break;
+		default: // Bad lump
+			CONS_Alert(CONS_WARNING, "Fade mask lump of incorrect size, ignored\n"); // I should avoid this by checking the lumpnum in HWR_RunWipe
+			break;
+	}
+
+	// Thankfully, this will still work for this scenario
+	HWR_ResizeBlock(fmwidth, fmheight, &grMipmap->grInfo);
+
+	grMipmap->width  = blockwidth;
+	grMipmap->height = blockheight;
+
+	block = MakeBlock(grMipmap);
+
+	HWR_DrawFadeMaskInCache(grMipmap, blockwidth, blockheight, fademasklumpnum, fmwidth, fmheight);
+
+	// I DO need to convert this because it isn't power of 2 and we need the alpha
+}
+
+
+void HWR_GetFadeMask(lumpnum_t fademasklumpnum)
+{
+	GLMipmap_t *grmip;
+
+	grmip = &HWR_GetCachedGLPatch(fademasklumpnum)->mipmap;
+
+	if (!grmip->downloaded && !grmip->grInfo.data)
+		HWR_CacheFadeMask(grmip, fademasklumpnum);
+
+	HWD.pfnSetTexture(grmip);
+
+	// The system-memory data can be purged now.
+	Z_ChangeTag(grmip->grInfo.data, PU_HWRCACHE_UNLOCKED);
+}
+
 #endif //HWRENDER
diff --git a/src/hardware/hw_glob.h b/src/hardware/hw_glob.h
index ee84e8b1081d091674b345d3ff8fbe9a550ed0a2..88786bc112ae459eb8826fc14ab551d8f479ce98 100644
--- a/src/hardware/hw_glob.h
+++ b/src/hardware/hw_glob.h
@@ -106,6 +106,7 @@ GLPatch_t *HWR_GetPic(lumpnum_t lumpnum);
 void HWR_SetPalette(RGBA_t *palette);
 GLPatch_t *HWR_GetCachedGLPatchPwad(UINT16 wad, UINT16 lump);
 GLPatch_t *HWR_GetCachedGLPatch(lumpnum_t lumpnum);
+void HWR_GetFadeMask(lumpnum_t fademasklumpnum);
 
 // --------
 // hw_draw.c
diff --git a/src/hardware/hw_main.c b/src/hardware/hw_main.c
index 7e64243a48aabbe31863f93750349af9d9d3a21b..e832457c04eec4345d258b618d9f6beed7fe2ccc 100644
--- a/src/hardware/hw_main.c
+++ b/src/hardware/hw_main.c
@@ -5419,7 +5419,7 @@ void HWR_StartScreenWipe(void)
 
 void HWR_EndScreenWipe(void)
 {
-	HWRWipeCounter = 1.0f;
+	HWRWipeCounter = 0.0f;
 	//CONS_Debug(DBG_RENDER, "In HWR_EndScreenWipe()\n");
 	HWD.pfnEndScreenWipe();
 }
@@ -5429,17 +5429,38 @@ void HWR_DrawIntermissionBG(void)
 	HWD.pfnDrawIntermissionBG();
 }
 
-void HWR_DoScreenWipe(void)
+void HWR_DoWipe(UINT8 wipenum, UINT8 scrnnum)
 {
-	//CONS_Debug(DBG_RENDER, "In HWR_DoScreenWipe(). Alpha =%f\n", HWRWipeCounter);
+	static char lumpname[9] = "FADEmmss";
+	lumpnum_t lumpnum;
+	size_t lsize;
+
+	if (wipenum > 99 || scrnnum > 99) // not a valid wipe number
+		return; // shouldn't end up here really, the loop should've stopped running beforehand
+
+	// puts the numbers into the lumpname
+	sprintf(&lumpname[4], "%.2hu%.2hu", (UINT16)wipenum, (UINT16)scrnnum);
+	lumpnum = W_CheckNumForName(lumpname);
+
+	if (lumpnum == LUMPERROR) // again, shouldn't be here really
+		return;
+
+	lsize = W_LumpLength(lumpnum);
+
+	if (!(lsize == 256000 || lsize == 64000 || lsize == 16000 || lsize == 4000))
+	{
+		CONS_Alert(CONS_WARNING, "Fade mask lump %s of incorrect size, ignored\n", lumpname);
+		return; // again, shouldn't get here if it is a bad size
+	}
+
+	HWR_GetFadeMask(lumpnum);
 
-	HWD.pfnDoScreenWipe(HWRWipeCounter);
+	HWD.pfnDoScreenWipe(HWRWipeCounter); // Still send in wipecounter since old stuff might not support multitexturing
 
-	// This works for all the cases in vanilla until fade masks get done
-	HWRWipeCounter -= 0.05f; // Go less opaque after
+	HWRWipeCounter += 0.05f; // increase opacity of end screen
 
-	if (HWRWipeCounter < 0)
-		HWRWipeCounter = 0;
+	if (HWRWipeCounter > 1.0f)
+		HWRWipeCounter = 1.0f;
 }
 
 #endif // HWRENDER
diff --git a/src/hardware/hw_main.h b/src/hardware/hw_main.h
index 8d8b69e60ee5e87aa7dea9f1015e1df947503401..969946442d1e67ca55db6fde5e238ab5411459db 100644
--- a/src/hardware/hw_main.h
+++ b/src/hardware/hw_main.h
@@ -63,8 +63,8 @@ INT32 HWR_GetTextureUsed(void);
 void HWR_DoPostProcessor(player_t *player);
 void HWR_StartScreenWipe(void);
 void HWR_EndScreenWipe(void);
-void HWR_DoScreenWipe(void);
 void HWR_DrawIntermissionBG(void);
+void HWR_DoWipe(UINT8 wipenum, UINT8 scrnnum);
 
 // This stuff is put here so MD2's can use them
 UINT32 HWR_Lighting(INT32 light, UINT32 color, UINT32 fadecolor, boolean fogblockpoly, boolean plane);
diff --git a/src/hardware/r_opengl/ogl_win.c b/src/hardware/r_opengl/ogl_win.c
index daf37fe2912ea69e003c627d288e96c73de71b8f..bfdc96d6a5322e4e41ca91e6d2f3d785238decf3 100644
--- a/src/hardware/r_opengl/ogl_win.c
+++ b/src/hardware/r_opengl/ogl_win.c
@@ -366,6 +366,10 @@ static INT32 WINAPI SetRes(viddef_t *lvid, vmode_t *pcurrentmode)
 	else
 		maximumAnisotropy = 0;
 
+#ifndef MINI_GL_COMPATIBILITY
+	SetupGLFunc13();
+#endif
+
 
 	screen_depth = (GLbyte)(lvid->bpp*8);
 	if (screen_depth > 16)
diff --git a/src/hardware/r_opengl/r_opengl.c b/src/hardware/r_opengl/r_opengl.c
index b853f084e00166dd6f91a37bef2bbdabe8498baa..7590f49f4f28995bd2a00fc2d27282d25105fd99 100644
--- a/src/hardware/r_opengl/r_opengl.c
+++ b/src/hardware/r_opengl/r_opengl.c
@@ -157,6 +157,10 @@ float byteasfloat(UINT8 fbyte)
 
 static I_Error_t I_Error_GL = NULL;
 
+#ifndef MINI_GL_COMPATIBILITY
+static boolean gl13 = false; // whether we can use opengl 1.3 functions
+#endif
+
 
 // -----------------+
 // DBG_Printf       : Output error messages to debug log if DEBUG_TO_FILE is defined,
@@ -263,6 +267,11 @@ FUNCPRINTF void DBG_Printf(const char *lpFmt, ...)
 /* GLU functions */
 #define pgluBuild2DMipmaps gluBuild2DMipmaps
 #endif
+#ifndef MINI_GL_COMPATIBILITY
+/* 1.3 functions for multitexturing */
+#define pglActiveTexture, glActiveTexture;
+#define pglMultiTexCoord2f, glMultiTexCoord2f;
+#endif
 #else //!STATIC_OPENGL
 
 /* 1.0 functions */
@@ -387,6 +396,14 @@ static PFNglCopyTexImage2D pglCopyTexImage2D;
 /* GLU functions */
 typedef GLint (APIENTRY * PFNgluBuild2DMipmaps) (GLenum target, GLint internalFormat, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *data);
 static PFNgluBuild2DMipmaps pgluBuild2DMipmaps;
+
+#ifndef MINI_GL_COMPATIBILITY
+/* 1.3 functions for multitexturing */
+typedef void (APIENTRY *PFNGLACTIVETEXTUREPROC) (GLenum);
+static PFNGLACTIVETEXTUREPROC pglActiveTexture;
+typedef void (APIENTRY *PFNGLMULTITEXCOORD2FPROC) (GLenum, GLfloat, GLfloat);
+static PFNGLMULTITEXCOORD2FPROC pglMultiTexCoord2f;
+#endif
 #endif
 
 #ifndef MINI_GL_COMPATIBILITY
@@ -402,6 +419,14 @@ static PFNgluBuild2DMipmaps pgluBuild2DMipmaps;
 #define GL_TEXTURE_MAX_LOD 0x813B
 #endif
 
+/* 1.3 GL_TEXTUREi */
+#ifndef GL_TEXTURE0
+#define GL_TEXTURE0 0x84C0
+#endif
+#ifndef GL_TEXTURE1
+#define GL_TEXTURE1 0x84C1
+#endif
+
 #endif
 
 #ifdef MINI_GL_COMPATIBILITY
@@ -492,6 +517,39 @@ boolean SetupGLfunc(void)
 	return true;
 }
 
+#ifndef MINI_GL_COMPATIBILITY
+// This has to be done after the context is created so the version number can be obtained
+boolean SetupGLFunc13(void)
+{
+#ifndef STATIC_OPENGL
+#define GETOPENGLFUNC(func, proc) \
+	func = GetGLFunc(#proc); \
+	if (!func) \
+	{ \
+		DBG_Printf("failed to get OpenGL function: %s", #proc); \
+	} \
+
+	const char *glversion = (const char *)pglGetString(GL_VERSION);
+	UINT32 majorversion = 0, minorversion = 0;
+
+	if (glversion != NULL && sscanf((char *)glversion, "%u.%u", &majorversion, &minorversion) == 2) // There is a version number I can identify
+	{
+		if (majorversion > 1 || (majorversion == 1 && minorversion >= 3)) // Version of OpenGL is equal to or greater than 1.3
+		{
+			// Get the functions
+			GETOPENGLFUNC(pglActiveTexture , glActiveTexture)
+			GETOPENGLFUNC(pglMultiTexCoord2f , glMultiTexCoord2f)
+
+			gl13 = true; // This is now true, so the new fade mask stuff can be done, if OpenGL version is less than 1.3, it still uses the old fade stuff.
+		}
+	}
+#undef GETOPENGLFUNC
+
+#endif
+	return true;
+}
+#endif
+
 // -----------------+
 // SetNoTexture     : Disable texture
 // -----------------+
@@ -1234,6 +1292,23 @@ EXPORT void HWRAPI(SetTexture) (FTextureInfo *pTexInfo)
 				}
 			}
 		}
+		else if (pTexInfo->grInfo.format == GR_TEXFMT_ALPHA_8) // Used for fade masks
+		{
+			const GLubyte *pImgData = (const GLubyte *)pTexInfo->grInfo.data;
+			INT32 i, j;
+
+			for (j = 0; j < h; j++)
+			{
+				for (i = 0; i < w; i++)
+				{
+					tex[w*j+i]  = (pImgData>>4)<<12;
+					tex[w*j+i] |= (255>>4)<<8;
+					tex[w*j+i] |= (255>>4)<<4;
+					tex[w*j+i] |= (255>>4);
+					pImgData++;
+				}
+			}
+		}
 		else
 			DBG_Printf ("SetTexture(bad format) %ld\n", pTexInfo->grInfo.format);
 #else
@@ -1299,6 +1374,23 @@ EXPORT void HWRAPI(SetTexture) (FTextureInfo *pTexInfo)
 				}
 			}
 		}
+		else if (pTexInfo->grInfo.format == GR_TEXFMT_ALPHA_8) // Used for fade masks
+		{
+			const GLubyte *pImgData = (const GLubyte *)pTexInfo->grInfo.data;
+			INT32 i, j;
+
+			for (j = 0; j < h; j++)
+			{
+				for (i = 0; i < w; i++)
+				{
+					tex[w*j+i].s.red   = 255; // 255 because the fade mask is modulated with the screen texture, so alpha affects it while the colours don't
+					tex[w*j+i].s.green = 255;
+					tex[w*j+i].s.blue  = 255;
+					tex[w*j+i].s.alpha = *pImgData;
+					pImgData++;
+				}
+			}
+		}
 		else
 			DBG_Printf ("SetTexture(bad format) %ld\n", pTexInfo->grInfo.format);
 #endif
@@ -2109,6 +2201,10 @@ EXPORT void HWRAPI(DoScreenWipe)(float alpha)
 	INT32 texsize = 2048;
 	float xfix, yfix;
 
+#ifndef MINI_GL_COMPATIBILITY
+	INT32 fademaskdownloaded = tex_downloaded; // the fade mask that has been set
+#endif
+
 	// Use a power of two texture, dammit
 	if(screen_width <= 1024)
 		texsize = 1024;
@@ -2122,8 +2218,8 @@ EXPORT void HWRAPI(DoScreenWipe)(float alpha)
 
 	SetBlend(PF_Modulated|PF_NoDepthTest|PF_Clip|PF_NoZClip);
 
-	// Draw the screen on bottom to fade to
-	pglBindTexture(GL_TEXTURE_2D, endScreenWipe);
+	// Draw the original screen
+	pglBindTexture(GL_TEXTURE_2D, startScreenWipe);
 	pglBegin(GL_QUADS);
 		pglColor4f(1.0f, 1.0f, 1.0f, 1.0f);
 
@@ -2142,12 +2238,56 @@ EXPORT void HWRAPI(DoScreenWipe)(float alpha)
 		// Bottom right
 		pglTexCoord2f(xfix, 0.0f);
 		pglVertex3f(1.0f, -1.0f, 1.0f);
+
 	pglEnd();
 
 	SetBlend(PF_Modulated|PF_Translucent|PF_NoDepthTest|PF_Clip|PF_NoZClip);
 
-	// Draw the screen on top that fades.
-	pglBindTexture(GL_TEXTURE_2D, startScreenWipe);
+#ifndef MINI_GL_COMPATIBILITY
+	if (gl13)
+	{
+		// Draw the end screen that fades in
+		pglActiveTexture(GL_TEXTURE0);
+		pglEnable(GL_TEXTURE_2D);
+		pglBindTexture(GL_TEXTURE_2D, endScreenWipe);
+
+		pglActiveTexture(GL_TEXTURE1);
+		pglEnable(GL_TEXTURE_2D);
+		pglBindTexture(GL_TEXTURE_2D, fademaskdownloaded);
+
+		pglTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
+		pglBegin(GL_QUADS);
+			pglColor4f(1.0f, 1.0f, 1.0f, 1.0f);
+
+			// Bottom left
+			pglMultiTexCoord2f(GL_TEXTURE0, 0.0f, 0.0f);
+			pglMultiTexCoord2f(GL_TEXTURE1, 0.0f, 0.0f);
+			pglVertex3f(-1.0f, -1.0f, 1.0f);
+
+			// Top left
+			pglMultiTexCoord2f(GL_TEXTURE0, 0.0f, yfix);
+			pglMultiTexCoord2f(GL_TEXTURE1, 0.0f, 1.0f);
+			pglVertex3f(-1.0f, 1.0f, 1.0f);
+
+			// Top right
+			pglMultiTexCoord2f(GL_TEXTURE0, xfix, yfix);
+			pglMultiTexCoord2f(GL_TEXTURE1, 1.0f, 1.0f);
+			pglVertex3f(1.0f, 1.0f, 1.0f);
+
+			// Bottom right
+			pglMultiTexCoord2f(GL_TEXTURE0, xfix, 0.0f);
+			pglMultiTexCoord2f(GL_TEXTURE1, 1.0f, 0.0f);
+			pglVertex3f(1.0f, -1.0f, 1.0f);
+		pglEnd();
+
+		pglDisable(GL_TEXTURE_2D); // disable the texture in the 2nd texture unit
+		pglActiveTexture(GL_TEXTURE0);
+	}
+	else
+	{
+#endif
+	// Draw the end screen that fades in
+	pglBindTexture(GL_TEXTURE_2D, endScreenWipe);
 	pglBegin(GL_QUADS);
 		pglColor4f(1.0f, 1.0f, 1.0f, alpha);
 
@@ -2166,8 +2306,10 @@ EXPORT void HWRAPI(DoScreenWipe)(float alpha)
 		// Bottom right
 		pglTexCoord2f(xfix, 0.0f);
 		pglVertex3f(1.0f, -1.0f, 1.0f);
-
 	pglEnd();
+#ifndef MINI_GL_COMPATIBILITY
+	}
+#endif
 
 	tex_downloaded = 0; // 0 so it knows it doesn't have any of the cached patches downloaded right now
 }
diff --git a/src/hardware/r_opengl/r_opengl.h b/src/hardware/r_opengl/r_opengl.h
index fd018f4b13c1b73b92761b88ed137ef6389cff7f..f4d4f77e00bbf4f38ff2bccac7bee579af7e9001 100644
--- a/src/hardware/r_opengl/r_opengl.h
+++ b/src/hardware/r_opengl/r_opengl.h
@@ -35,6 +35,13 @@
 #else
 #include <GL/gl.h>
 #include <GL/glu.h>
+
+#ifndef MINI_GL_COMPATIBILITY
+#ifdef STATIC_OPENGL // Because of the 1.3 functions, you'll need GLext to compile it if static
+#define GL_GLEXT_PROTOTYPES
+#include <GL/glext.h>
+#endif
+#endif
 #endif
 
 #define  _CREATE_DLL_  // necessary for Unix AND Windows
@@ -66,6 +73,7 @@
 boolean LoadGL(void);
 void *GetGLFunc(const char *proc);
 boolean SetupGLfunc(void);
+boolean SetupGLFunc13(void);
 void Flush(void);
 INT32 isExtAvailable(const char *extension, const GLubyte *start);
 boolean SetupPixelFormat(INT32 WantColorBits, INT32 WantStencilBits, INT32 WantDepthBits);