diff --git a/src/f_wipe.c b/src/f_wipe.c
index 4bcfb029b2ed1815e895ff126127bb628c6709fb..1ea32d0ebe92e6fdcfda930356cb36959c7acb91 100644
--- a/src/f_wipe.c
+++ b/src/f_wipe.c
@@ -569,7 +569,7 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu)
 			if (rendermode == render_opengl)
 			{
 				// send in the wipe type and wipe frame because we need to cache the graphic
-				HWR_DoTintedWipe(wipetype, wipeframe-1);
+				HWR_DoWipe(wipetype, wipeframe-1);
 			}
 			else
 #endif
diff --git a/src/hardware/CMakeLists.txt b/src/hardware/CMakeLists.txt
index e7819aba97e2065d36f6f920d4725d7b294505f3..3b6135c1d768a732944d2a18b4e0d41f7fb8c461 100644
--- a/src/hardware/CMakeLists.txt
+++ b/src/hardware/CMakeLists.txt
@@ -10,5 +10,6 @@ target_sources(SRB2SDL2 PRIVATE
 	hw_md3load.c
 	hw_model.c
 	hw_batching.c
+	hw_shaders.c
 	r_opengl/r_opengl.c
 )
diff --git a/src/hardware/Sourcefile b/src/hardware/Sourcefile
index 6c374621d7b1de61f2b5a5c6fd9171f0685eccbf..4fa61470f26616ccb0dd311b4d08045054f23921 100644
--- a/src/hardware/Sourcefile
+++ b/src/hardware/Sourcefile
@@ -9,4 +9,5 @@ hw_md2load.c
 hw_md3load.c
 hw_model.c
 hw_batching.c
+hw_shaders.c
 r_opengl/r_opengl.c
diff --git a/src/hardware/hw_batching.c b/src/hardware/hw_batching.c
index a640a9917ad169cbeb8a141e6741614081d251df..b9ab2592d3f00a4fcdb4c1af31e2eaae484cd584 100644
--- a/src/hardware/hw_batching.c
+++ b/src/hardware/hw_batching.c
@@ -76,7 +76,7 @@ void HWR_SetCurrentTexture(GLMipmap_t *texture)
 // If batching is enabled, this function collects the polygon data and the chosen texture
 // for later use in HWR_RenderBatches. Otherwise the rendering backend is used to
 // render the polygon immediately.
-void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags, int shader, boolean horizonSpecial)
+void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags, int shader_target, boolean horizonSpecial)
 {
     if (currently_batching)
 	{
@@ -114,7 +114,7 @@ void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPt
 		polygonArray[polygonArraySize].numVerts = iNumPts;
 		polygonArray[polygonArraySize].polyFlags = PolyFlags;
 		polygonArray[polygonArraySize].texture = current_texture;
-		polygonArray[polygonArraySize].shader = shader;
+		polygonArray[polygonArraySize].shader = (shader_target != -1) ? HWR_GetShaderFromTarget(shader_target) : shader_target;
 		polygonArray[polygonArraySize].horizonSpecial = horizonSpecial;
 		// default to polygonArraySize so we don't lose order on horizon lines
 		// (yes, it's supposed to be negative, since we're sorting in that direction)
@@ -134,7 +134,7 @@ void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPt
 			DIGEST(hash, pSurf->PolyColor.rgba);
 			if (cv_glshaders.value && gl_shadersavailable)
 			{
-				DIGEST(hash, shader);
+				DIGEST(hash, shader_target);
 				DIGEST(hash, pSurf->TintColor.rgba);
 				DIGEST(hash, pSurf->FadeColor.rgba);
 				DIGEST(hash, pSurf->LightInfo.light_level);
@@ -151,10 +151,9 @@ void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPt
 	}
 	else
 	{
-        if (shader)
-            HWD.pfnSetShader(shader);
-        HWD.pfnDrawPolygon(pSurf, pOutVerts, iNumPts, PolyFlags);
-    }
+		HWD.pfnSetShader((shader_target != SHADER_NONE) ? HWR_GetShaderFromTarget(shader_target) : shader_target);
+		HWD.pfnDrawPolygon(pSurf, pOutVerts, iNumPts, PolyFlags);
+	}
 }
 
 static int comparePolygons(const void *p1, const void *p2)
diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c
index 55a32114a87203b5b5bd4ef451f0b8bd3e08c6fa..f1f0668be51d99dd3e5e07bcb4760083afb76c33 100644
--- a/src/hardware/hw_cache.c
+++ b/src/hardware/hw_cache.c
@@ -32,6 +32,14 @@
 INT32 patchformat = GL_TEXFMT_AP_88; // use alpha for holes
 INT32 textureformat = GL_TEXFMT_P_8; // use chromakey for hole
 
+RGBA_t mapPalette[256] = {0}; // the palette for the currently loaded level or menu etc.
+
+// Returns a pointer to the palette which should be used for caching textures.
+RGBA_t *HWR_GetTexturePalette(void)
+{
+	return HWR_ShouldUsePaletteRendering() ? mapPalette : pLocalPalette;
+}
+
 static INT32 format2bpp(GLTextureFormat_t format)
 {
 	if (format == GL_TEXFMT_RGBA)
@@ -49,7 +57,7 @@ static void HWR_DrawColumnInCache(const column_t *patchcol, UINT8 *block, GLMipm
 								INT32 pblockheight, INT32 blockmodulo,
 								fixed_t yfracstep, fixed_t scale_y,
 								texpatch_t *originPatch, INT32 patchheight,
-								INT32 bpp)
+								INT32 bpp, RGBA_t *palette)
 {
 	fixed_t yfrac, position, count;
 	UINT8 *dest;
@@ -113,7 +121,7 @@ static void HWR_DrawColumnInCache(const column_t *patchcol, UINT8 *block, GLMipm
 					memcpy(dest, &texelu16, sizeof(UINT16));
 					break;
 				case 3:
-					colortemp = V_GetColor(texel);
+					colortemp = palette[texel];
 					if ((originPatch != NULL) && (originPatch->style != AST_COPY))
 					{
 						RGBA_t rgbatexel;
@@ -123,7 +131,7 @@ static void HWR_DrawColumnInCache(const column_t *patchcol, UINT8 *block, GLMipm
 					memcpy(dest, &colortemp, sizeof(RGBA_t)-sizeof(UINT8));
 					break;
 				case 4:
-					colortemp = V_GetColor(texel);
+					colortemp = palette[texel];
 					colortemp.s.alpha = alpha;
 					if ((originPatch != NULL) && (originPatch->style != AST_COPY))
 					{
@@ -152,7 +160,7 @@ static void HWR_DrawFlippedColumnInCache(const column_t *patchcol, UINT8 *block,
 								INT32 pblockheight, INT32 blockmodulo,
 								fixed_t yfracstep, fixed_t scale_y,
 								texpatch_t *originPatch, INT32 patchheight,
-								INT32 bpp)
+								INT32 bpp, RGBA_t *palette)
 {
 	fixed_t yfrac, position, count;
 	UINT8 *dest;
@@ -217,7 +225,7 @@ static void HWR_DrawFlippedColumnInCache(const column_t *patchcol, UINT8 *block,
 					memcpy(dest, &texelu16, sizeof(UINT16));
 					break;
 				case 3:
-					colortemp = V_GetColor(texel);
+					colortemp = palette[texel];
 					if ((originPatch != NULL) && (originPatch->style != AST_COPY))
 					{
 						RGBA_t rgbatexel;
@@ -227,7 +235,7 @@ static void HWR_DrawFlippedColumnInCache(const column_t *patchcol, UINT8 *block,
 					memcpy(dest, &colortemp, sizeof(RGBA_t)-sizeof(UINT8));
 					break;
 				case 4:
-					colortemp = V_GetColor(texel);
+					colortemp = palette[texel];
 					colortemp.s.alpha = alpha;
 					if ((originPatch != NULL) && (originPatch->style != AST_COPY))
 					{
@@ -269,10 +277,13 @@ static void HWR_DrawPatchInCache(GLMipmap_t *mipmap,
 	UINT8 *block = mipmap->data;
 	INT32 bpp;
 	INT32 blockmodulo;
+	RGBA_t *palette;
 
 	if (pwidth <= 0 || pheight <= 0)
 		return;
 
+	palette = HWR_GetTexturePalette();
+
 	ncols = pwidth;
 
 	// source advance
@@ -298,7 +309,7 @@ static void HWR_DrawPatchInCache(GLMipmap_t *mipmap,
 								pblockheight, blockmodulo,
 								yfracstep, scale_y,
 								NULL, pheight, // not that pheight is going to get used anyway...
-								bpp);
+								bpp, palette);
 	}
 }
 
@@ -317,16 +328,19 @@ static void HWR_DrawTexturePatchInCache(GLMipmap_t *mipmap,
 	INT32 bpp;
 	INT32 blockmodulo;
 	INT32 width, height;
+	RGBA_t *palette;
 	// Column drawing function pointer.
 	static void (*ColumnDrawerPointer)(const column_t *patchcol, UINT8 *block, GLMipmap_t *mipmap,
 								INT32 pblockheight, INT32 blockmodulo,
 								fixed_t yfracstep, fixed_t scale_y,
 								texpatch_t *originPatch, INT32 patchheight,
-								INT32 bpp);
+								INT32 bpp, RGBA_t *palette);
 
 	if (texture->width <= 0 || texture->height <= 0)
 		return;
 
+	palette = HWR_GetTexturePalette();
+
 	ColumnDrawerPointer = (patch->flip & 2) ? HWR_DrawFlippedColumnInCache : HWR_DrawColumnInCache;
 
 	x1 = patch->originx;
@@ -386,7 +400,7 @@ static void HWR_DrawTexturePatchInCache(GLMipmap_t *mipmap,
 								pblockheight, blockmodulo,
 								yfracstep, scale_y,
 								patch, height,
-								bpp);
+								bpp, palette);
 	}
 }
 
@@ -429,6 +443,9 @@ static void HWR_GenerateTexture(INT32 texnum, GLMapTexture_t *grtex)
 	INT32 i;
 	boolean skyspecial = false; //poor hack for Legacy large skies..
 
+	RGBA_t *palette;
+	palette = HWR_GetTexturePalette();
+
 	texture = textures[texnum];
 
 	// hack the Legacy skies..
@@ -447,7 +464,10 @@ static void HWR_GenerateTexture(INT32 texnum, GLMapTexture_t *grtex)
 
 	grtex->mipmap.width = (UINT16)texture->width;
 	grtex->mipmap.height = (UINT16)texture->height;
-	grtex->mipmap.format = textureformat;
+	if (skyspecial)
+		grtex->mipmap.format = GL_TEXFMT_RGBA; // that skyspecial code below assumes this format ...
+	else
+		grtex->mipmap.format = textureformat;
 
 	blockwidth = texture->width;
 	blockheight = texture->height;
@@ -459,7 +479,7 @@ static void HWR_GenerateTexture(INT32 texnum, GLMapTexture_t *grtex)
 		INT32 j;
 		RGBA_t col;
 
-		col = V_GetColor(HWR_PATCHES_CHROMAKEY_COLORINDEX);
+		col = palette[HWR_PATCHES_CHROMAKEY_COLORINDEX];
 		for (j = 0; j < blockheight; j++)
 		{
 			for (i = 0; i < blockwidth; i++)
@@ -739,19 +759,6 @@ void HWR_LoadMapTextures(size_t pnumtextures)
 	gl_maptexturesloaded = true;
 }
 
-void HWR_SetPalette(RGBA_t *palette)
-{
-	HWD.pfnSetPalette(palette);
-
-	// hardware driver will flush there own cache if cache is non paletized
-	// now flush data texture cache so 32 bit texture are recomputed
-	if (patchformat == GL_TEXFMT_RGBA || textureformat == GL_TEXFMT_RGBA)
-	{
-		Z_FreeTag(PU_HWRCACHE);
-		Z_FreeTag(PU_HWRCACHE_UNLOCKED);
-	}
-}
-
 // --------------------------------------------------------------------------
 // Make sure texture is downloaded and set it as the source
 // --------------------------------------------------------------------------
@@ -965,6 +972,139 @@ void HWR_UnlockCachedPatch(GLPatch_t *gpatch)
 	Z_ChangeTag(gpatch->mipmap->data, PU_HWRCACHE_UNLOCKED);
 }
 
+static const INT32 picmode2GR[] =
+{
+	GL_TEXFMT_P_8,                // PALETTE
+	0,                            // INTENSITY          (unsupported yet)
+	GL_TEXFMT_ALPHA_INTENSITY_88, // INTENSITY_ALPHA    (corona use this)
+	0,                            // RGB24              (unsupported yet)
+	GL_TEXFMT_RGBA,               // RGBA32             (opengl only)
+};
+
+static void HWR_DrawPicInCache(UINT8 *block, INT32 pblockwidth, INT32 pblockheight,
+	INT32 blockmodulo, pic_t *pic, INT32 bpp)
+{
+	INT32 i,j;
+	fixed_t posx, posy, stepx, stepy;
+	UINT8 *dest, *src, texel;
+	UINT16 texelu16;
+	INT32 picbpp;
+	RGBA_t col;
+	RGBA_t *palette = HWR_GetTexturePalette();
+
+	stepy = ((INT32)SHORT(pic->height)<<FRACBITS)/pblockheight;
+	stepx = ((INT32)SHORT(pic->width)<<FRACBITS)/pblockwidth;
+	picbpp = format2bpp(picmode2GR[pic->mode]);
+	posy = 0;
+	for (j = 0; j < pblockheight; j++)
+	{
+		posx = 0;
+		dest = &block[j*blockmodulo];
+		src = &pic->data[(posy>>FRACBITS)*SHORT(pic->width)*picbpp];
+		for (i = 0; i < pblockwidth;i++)
+		{
+			switch (pic->mode)
+			{ // source bpp
+				case PALETTE :
+					texel = src[(posx+FRACUNIT/2)>>FRACBITS];
+					switch (bpp)
+					{ // destination bpp
+						case 1 :
+							*dest++ = texel; break;
+						case 2 :
+							texelu16 = (UINT16)(texel | 0xff00);
+							memcpy(dest, &texelu16, sizeof(UINT16));
+							dest += sizeof(UINT16);
+							break;
+						case 3 :
+							col = palette[texel];
+							memcpy(dest, &col, sizeof(RGBA_t)-sizeof(UINT8));
+							dest += sizeof(RGBA_t)-sizeof(UINT8);
+							break;
+						case 4 :
+							memcpy(dest, &palette[texel], sizeof(RGBA_t));
+							dest += sizeof(RGBA_t);
+							break;
+					}
+					break;
+				case INTENSITY :
+					*dest++ = src[(posx+FRACUNIT/2)>>FRACBITS];
+					break;
+				case INTENSITY_ALPHA : // assume dest bpp = 2
+					memcpy(dest, src + ((posx+FRACUNIT/2)>>FRACBITS)*sizeof(UINT16), sizeof(UINT16));
+					dest += sizeof(UINT16);
+					break;
+				case RGB24 :
+					break;  // not supported yet
+				case RGBA32 : // assume dest bpp = 4
+					dest += sizeof(UINT32);
+					memcpy(dest, src + ((posx+FRACUNIT/2)>>FRACBITS)*sizeof(UINT32), sizeof(UINT32));
+					break;
+			}
+			posx += stepx;
+		}
+		posy += stepy;
+	}
+}
+
+// -----------------+
+// HWR_GetPic       : Download a Doom pic (raw row encoded with no 'holes')
+// Returns          :
+// -----------------+
+patch_t *HWR_GetPic(lumpnum_t lumpnum)
+{
+	patch_t *patch = HWR_GetCachedGLPatch(lumpnum);
+	GLPatch_t *grPatch = (GLPatch_t *)(patch->hardware);
+
+	if (!grPatch->mipmap->downloaded && !grPatch->mipmap->data)
+	{
+		pic_t *pic;
+		UINT8 *block;
+		size_t len;
+
+		pic = W_CacheLumpNum(lumpnum, PU_CACHE);
+		patch->width = SHORT(pic->width);
+		patch->height = SHORT(pic->height);
+		len = W_LumpLength(lumpnum) - sizeof (pic_t);
+
+		grPatch->mipmap->width = (UINT16)patch->width;
+		grPatch->mipmap->height = (UINT16)patch->height;
+
+		if (pic->mode == PALETTE)
+			grPatch->mipmap->format = textureformat; // can be set by driver
+		else
+			grPatch->mipmap->format = picmode2GR[pic->mode];
+
+		Z_Free(grPatch->mipmap->data);
+
+		// allocate block
+		block = MakeBlock(grPatch->mipmap);
+
+		if (patch->width  == SHORT(pic->width) &&
+			patch->height == SHORT(pic->height) &&
+			format2bpp(grPatch->mipmap->format) == format2bpp(picmode2GR[pic->mode]))
+		{
+			// no conversion needed
+			M_Memcpy(grPatch->mipmap->data, pic->data,len);
+		}
+		else
+			HWR_DrawPicInCache(block, SHORT(pic->width), SHORT(pic->height),
+			                   SHORT(pic->width)*format2bpp(grPatch->mipmap->format),
+			                   pic,
+			                   format2bpp(grPatch->mipmap->format));
+
+		Z_Unlock(pic);
+		Z_ChangeTag(block, PU_HWRCACHE_UNLOCKED);
+
+		grPatch->mipmap->flags = 0;
+		grPatch->max_s = grPatch->max_t = 1.0f;
+	}
+	HWD.pfnSetTexture(grPatch->mipmap);
+	//CONS_Debug(DBG_RENDER, "picloaded at %x as texture %d\n",grPatch->mipmap->data, grPatch->mipmap->downloaded);
+
+	return patch;
+}
+
 patch_t *HWR_GetCachedGLPatchPwad(UINT16 wadnum, UINT16 lumpnum)
 {
 	lumpcache_t *lumpcache = wadfiles[wadnum]->patchcache;
@@ -992,6 +1132,7 @@ static void HWR_DrawFadeMaskInCache(GLMipmap_t *mipmap, INT32 pblockwidth, INT32
 	UINT8 *flat;
 	UINT8 *dest, *src, texel;
 	RGBA_t col;
+	RGBA_t *palette = HWR_GetTexturePalette();
 
 	// Place the flats data into flat
 	W_ReadLump(fademasklumpnum, Z_Malloc(W_LumpLength(fademasklumpnum),
@@ -1009,7 +1150,7 @@ static void HWR_DrawFadeMaskInCache(GLMipmap_t *mipmap, INT32 pblockwidth, INT32
 		{
 			// fademask bpp is always 1, and is used just for alpha
 			texel = src[(posx)>>FRACBITS];
-			col = V_GetColor(texel);
+			col = palette[texel];
 			*dest = col.s.red; // take the red level of the colour and use it for alpha, as fademasks do
 
 			dest++;
@@ -1081,4 +1222,185 @@ void HWR_GetFadeMask(lumpnum_t fademasklumpnum)
 	Z_ChangeTag(grmip->data, PU_HWRCACHE_UNLOCKED);
 }
 
+// =================================================
+//             PALETTE HANDLING
+// =================================================
+
+void HWR_SetPalette(RGBA_t *palette)
+{
+	if (HWR_ShouldUsePaletteRendering())
+	{
+		// set the palette for palette postprocessing
+
+		if (cv_glpalettedepth.value == 16)
+		{
+			// crush to 16-bit rgb565, like software currently does in the standard configuration
+			// Note: Software's screenshots have the 24-bit palette, but the screen gets
+			// the 16-bit version! For making comparison screenshots either use an external screenshot
+			// tool or set the palette depth to 24 bits.
+			RGBA_t crushed_palette[256];
+			int i;
+			for (i = 0; i < 256; i++)
+			{
+				float fred = (float)(palette[i].s.red >> 3);
+				float fgreen = (float)(palette[i].s.green >> 2);
+				float fblue = (float)(palette[i].s.blue >> 3);
+				crushed_palette[i].s.red = (UINT8)(fred / 31.0f * 255.0f);
+				crushed_palette[i].s.green = (UINT8)(fgreen / 63.0f * 255.0f);
+				crushed_palette[i].s.blue = (UINT8)(fblue / 31.0f * 255.0f);
+				crushed_palette[i].s.alpha = 255;
+			}
+			HWD.pfnSetScreenPalette(crushed_palette);
+		}
+		else
+		{
+			HWD.pfnSetScreenPalette(palette);
+		}
+
+		// this part is responsible for keeping track of the palette OUTSIDE of a level.
+		if (!(gamestate == GS_LEVEL || (gamestate == GS_TITLESCREEN && titlemapinaction)))
+			HWR_SetMapPalette();
+	}
+	else
+	{
+		// set the palette for the textures
+		HWD.pfnSetTexturePalette(palette);
+		// reset mapPalette so next call to HWR_SetMapPalette will update everything correctly
+		memset(mapPalette, 0, sizeof(mapPalette));
+		// hardware driver will flush there own cache if cache is non paletized
+		// now flush data texture cache so 32 bit texture are recomputed
+		if (patchformat == GL_TEXFMT_RGBA || textureformat == GL_TEXFMT_RGBA)
+		{
+			Z_FreeTag(PU_HWRCACHE);
+			Z_FreeTag(PU_HWRCACHE_UNLOCKED);
+		}
+	}
+}
+
+static void HWR_SetPaletteLookup(RGBA_t *palette)
+{
+	int r, g, b;
+	UINT8 *lut = Z_Malloc(
+		HWR_PALETTE_LUT_SIZE*HWR_PALETTE_LUT_SIZE*HWR_PALETTE_LUT_SIZE*sizeof(UINT8),
+		PU_STATIC, NULL);
+#define STEP_SIZE (256/HWR_PALETTE_LUT_SIZE)
+	for (b = 0; b < HWR_PALETTE_LUT_SIZE; b++)
+	{
+		for (g = 0; g < HWR_PALETTE_LUT_SIZE; g++)
+		{
+			for (r = 0; r < HWR_PALETTE_LUT_SIZE; r++)
+			{
+				lut[b*HWR_PALETTE_LUT_SIZE*HWR_PALETTE_LUT_SIZE+g*HWR_PALETTE_LUT_SIZE+r] =
+					NearestPaletteColor(r*STEP_SIZE, g*STEP_SIZE, b*STEP_SIZE, palette);
+			}
+		}
+	}
+#undef STEP_SIZE
+	HWD.pfnSetPaletteLookup(lut);
+	Z_Free(lut);
+}
+
+// Updates mapPalette to reflect the loaded level or other game state.
+// Textures are flushed if needed.
+// Call this function only in palette rendering mode.
+void HWR_SetMapPalette(void)
+{
+	RGBA_t RGBA_converted[256];
+	RGBA_t *palette;
+	int i;
+
+	if (!(gamestate == GS_LEVEL || (gamestate == GS_TITLESCREEN && titlemapinaction)))
+	{
+		// outside of a level, pMasterPalette should have PLAYPAL ready for us
+		palette = pMasterPalette;
+	}
+	else
+	{
+		// in a level pMasterPalette might have a flash palette, but we
+		// want the map's original palette.
+		lumpnum_t lumpnum = W_GetNumForName(GetPalette());
+		size_t palsize = W_LumpLength(lumpnum);
+		UINT8 *RGB_data;
+		if (palsize < 768) // 256 * 3
+			I_Error("HWR_SetMapPalette: A programmer assumed palette lumps are at least 768 bytes long, but apparently this was a wrong assumption!\n");
+		RGB_data = W_CacheLumpNum(lumpnum, PU_CACHE);
+		// we got the RGB palette now, but we need it in RGBA format.
+		for (i = 0; i < 256; i++)
+		{
+			RGBA_converted[i].s.red = *(RGB_data++);
+			RGBA_converted[i].s.green = *(RGB_data++);
+			RGBA_converted[i].s.blue = *(RGB_data++);
+			RGBA_converted[i].s.alpha = 255;
+		}
+		palette = RGBA_converted;
+	}
+
+	// check if the palette has changed from the previous one
+	if (memcmp(mapPalette, palette, sizeof(mapPalette)))
+	{
+		memcpy(mapPalette, palette, sizeof(mapPalette));
+		// in palette rendering mode, this means that all rgba textures now have wrong colors
+		// and the lookup table is outdated
+		HWR_SetPaletteLookup(mapPalette);
+		HWD.pfnSetTexturePalette(mapPalette);
+		if (patchformat == GL_TEXFMT_RGBA || textureformat == GL_TEXFMT_RGBA)
+		{
+			Z_FreeTag(PU_HWRCACHE);
+			Z_FreeTag(PU_HWRCACHE_UNLOCKED);
+		}
+	}
+}
+
+// Creates a hardware lighttable from the supplied lighttable.
+// Returns the id of the hw lighttable, usable in FSurfaceInfo.
+UINT32 HWR_CreateLightTable(UINT8 *lighttable)
+{
+	UINT32 i, id;
+	RGBA_t *palette = HWR_GetTexturePalette();
+	RGBA_t *hw_lighttable = Z_Malloc(256 * 32 * sizeof(RGBA_t), PU_STATIC, NULL);
+
+	// To make the palette index -> RGBA mapping easier for the shader,
+	// the hardware lighttable is composed of RGBA colors instead of palette indices.
+	for (i = 0; i < 256 * 32; i++)
+		hw_lighttable[i] = palette[lighttable[i]];
+
+	id = HWD.pfnCreateLightTable(hw_lighttable);
+	Z_Free(hw_lighttable);
+	return id;
+}
+
+// get hwr lighttable id for colormap, create it if it doesn't already exist
+UINT32 HWR_GetLightTableID(extracolormap_t *colormap)
+{
+	boolean default_colormap = false;
+	if (!colormap)
+	{
+		colormap = R_GetDefaultColormap(); // a place to store the hw lighttable id
+		// alternatively could just store the id in a global variable if there are issues
+		default_colormap = true;
+	}
+
+	// create hw lighttable if there isn't one
+	if (!colormap->gl_lighttable_id)
+	{
+		UINT8 *colormap_pointer;
+
+		if (default_colormap)
+			colormap_pointer = colormaps; // don't actually use the data from the "default colormap"
+		else
+			colormap_pointer = colormap->colormap;
+		colormap->gl_lighttable_id = HWR_CreateLightTable(colormap_pointer);
+	}
+
+	return colormap->gl_lighttable_id;
+}
+
+// Note: all hardware lighttable ids assigned before this
+// call become invalid and must not be used.
+void HWR_ClearLightTables(void)
+{
+	if (vid.glstate == VID_GL_LIBRARY_LOADED)
+		HWD.pfnClearLightTables();
+}
+
 #endif //HWRENDER
diff --git a/src/hardware/hw_defs.h b/src/hardware/hw_defs.h
index 3b660cc70c36515dab60b4ae3eabb914b5de7d5d..2d55eef2d8fd72271fef9c529d4be862318f93da 100644
--- a/src/hardware/hw_defs.h
+++ b/src/hardware/hw_defs.h
@@ -18,6 +18,12 @@
 #define ZCLIP_PLANE 4.0f // Used for the actual game drawing
 #define NZCLIP_PLANE 0.9f // Seems to be only used for the HUD and screen textures
 
+// The width/height/depth of the palette lookup table used by palette rendering.
+// Changing this also requires changing the shader code!
+// Also assumed to be a power of two in some parts of the code.
+// 64 seems to work perfectly for the vanilla palette.
+#define HWR_PALETTE_LUT_SIZE 64
+
 // ==========================================================================
 //                                                               SIMPLE TYPES
 // ==========================================================================
@@ -122,33 +128,31 @@ typedef struct
 } FOutVector;
 
 #ifdef GL_SHADERS
-// Predefined shader types
+
+// Shader targets used to render specific types of geometry.
+// A shader target is resolved to an actual shader with HWR_GetShaderFromTarget.
+// The shader returned may be a base shader or a custom shader.
 enum
 {
 	SHADER_NONE = -1,
-	SHADER_DEFAULT = 0,
 
-	SHADER_FLOOR,
+	SHADER_FLOOR = 0,
 	SHADER_WALL,
 	SHADER_SPRITE,
-	SHADER_MODEL, SHADER_MODEL_LIGHTING,
+	SHADER_MODEL,
 	SHADER_WATER,
 	SHADER_FOG,
 	SHADER_SKY,
+	SHADER_PALETTE_POSTPROCESS,
+	SHADER_UI_COLORMAP_FADE,
+	SHADER_UI_TINTED_WIPE,
 
-	NUMBASESHADERS,
+	NUMSHADERTARGETS
 };
 
 // Maximum amount of shader programs
-// Must be higher than NUMBASESHADERS
-#define HWR_MAXSHADERS 16
-
-// Shader sources (vertex and fragment)
-typedef struct
-{
-	char *vertex;
-	char *fragment;
-} shadersource_t;
+// Must be at least NUMSHADERTARGETS*2 to fit base and custom shaders for each shader target.
+#define HWR_MAXSHADERS NUMSHADERTARGETS*2
 
 // Custom shader reference table
 typedef struct
@@ -272,11 +276,15 @@ struct FSurfaceInfo
 	RGBA_t			PolyColor;
 	RGBA_t			TintColor;
 	RGBA_t			FadeColor;
+	UINT32			LightTableId;
 	FLightInfo		LightInfo;
 };
 typedef struct FSurfaceInfo FSurfaceInfo;
 
-//Hurdler: added for backward compatibility
+#define GL_DEFAULTMIX 0x00000000
+#define GL_DEFAULTFOG 0xFF000000
+
+// Various settings and states for the rendering backend.
 enum hwdsetspecialstate
 {
 	HWD_SET_MODEL_LIGHTING = 1,
@@ -289,15 +297,13 @@ enum hwdsetspecialstate
 
 typedef enum hwdsetspecialstate hwdspecialstate_t;
 
-// Lactozilla: Shader options
-enum hwdshaderoption
+enum hwdshaderstage
 {
-	HWD_SHADEROPTION_OFF,
-	HWD_SHADEROPTION_ON,
-	HWD_SHADEROPTION_NOCUSTOM,
+	HWD_SHADERSTAGE_VERTEX,
+	HWD_SHADERSTAGE_FRAGMENT,
 };
 
-typedef enum hwdshaderoption hwdshaderoption_t;
+typedef enum hwdshaderstage hwdshaderstage_t;
 
 // Lactozilla: Shader info
 // Generally set at the start of the frame.
@@ -318,5 +324,18 @@ enum hwdfiltermode
 	HWD_SET_TEXTUREFILTER_MIXED3,
 };
 
+// Screen texture slots
+enum hwdscreentexture
+{
+	HWD_SCREENTEXTURE_WIPE_START, // source image for the wipe/fade effect
+	HWD_SCREENTEXTURE_WIPE_END,   // destination image for the wipe/fade effect
+	HWD_SCREENTEXTURE_GENERIC1,   // underwater/heat effect, intermission background
+	HWD_SCREENTEXTURE_GENERIC2,   // palette-based colormap fade, screen before palette rendering's postprocessing
+	HWD_SCREENTEXTURE_GENERIC3,   // screen after palette rendering's postprocessing
+	NUMSCREENTEXTURES,            // (generic3 is unused if palette rendering is disabled)
+};
+
+typedef enum hwdscreentexture hwdscreentexture_t;
+
 
 #endif //_HWR_DEFS_
diff --git a/src/hardware/hw_draw.c b/src/hardware/hw_draw.c
index ba1f339d0fe9af16d7eee7ab0f73d7558de0936e..e07484137c83433edc89207a3b85ae5f921d348d 100644
--- a/src/hardware/hw_draw.c
+++ b/src/hardware/hw_draw.c
@@ -30,6 +30,7 @@
 #include "../st_stuff.h"
 #include "../p_local.h" // stplyr
 #include "../g_game.h" // players
+#include "../f_finale.h" // fade color factors
 
 #include <fcntl.h>
 #include "../i_video.h"  // for rendermode != render_glide
@@ -707,6 +708,7 @@ void HWR_FadeScreenMenuBack(UINT16 color, UINT8 strength)
 {
 	FOutVector  v[4];
 	FSurfaceInfo Surf;
+	FBITFIELD poly_flags = PF_NoTexture|PF_Modulated|PF_NoDepthTest;
 
 	v[0].x = v[3].x = -1.0f;
 	v[2].x = v[1].x =  1.0f;
@@ -719,17 +721,59 @@ void HWR_FadeScreenMenuBack(UINT16 color, UINT8 strength)
 	v[0].t = v[1].t = 1.0f;
 	v[2].t = v[3].t = 0.0f;
 
-	if (color & 0xFF00) // Do COLORMAP fade.
+	if (color & 0xFF00) // Special fade options
 	{
-		Surf.PolyColor.rgba = UINT2RGBA(0x01010160);
-		Surf.PolyColor.s.alpha = (strength*8);
+		UINT16 option = color & 0x0F00;
+		if (option == 0x0A00 || option == 0x0B00) // Tinted fades
+		{
+			INT32 r, g, b;
+			int fade = strength * 8;
+
+			r = FADEREDFACTOR*fade/10;
+			g = FADEGREENFACTOR*fade/10;
+			b = FADEBLUEFACTOR*fade/10;
+
+			Surf.PolyColor.s.red = min(r, 255);
+			Surf.PolyColor.s.green = min(g, 255);
+			Surf.PolyColor.s.blue = min(b, 255);
+			Surf.PolyColor.s.alpha = 255;
+
+			if (option == 0x0A00) // Tinted subtractive fade
+				poly_flags |= PF_ReverseSubtract;
+			else if (option == 0x0B00) // Tinted additive fade
+				poly_flags |= PF_Additive;
+		}
+		else // COLORMAP fade
+		{
+			if (HWR_ShouldUsePaletteRendering())
+			{
+				const hwdscreentexture_t scr_tex = HWD_SCREENTEXTURE_GENERIC2;
+
+				Surf.LightTableId = HWR_GetLightTableID(NULL);
+				Surf.LightInfo.light_level = strength;
+				HWD.pfnMakeScreenTexture(scr_tex);
+				HWD.pfnSetShader(HWR_GetShaderFromTarget(SHADER_UI_COLORMAP_FADE));
+				HWD.pfnDrawScreenTexture(scr_tex, &Surf, PF_ColorMapped|PF_NoDepthTest);
+				HWD.pfnUnSetShader();
+
+				return;
+			}
+			else
+			{
+				Surf.PolyColor.rgba = UINT2RGBA(0x01010160);
+				Surf.PolyColor.s.alpha = (strength*8);
+				poly_flags |= PF_Translucent;
+			}
+		}
 	}
 	else // Do TRANSMAP** fade.
 	{
-		Surf.PolyColor.rgba = V_GetColor(color).rgba;
+		RGBA_t *palette = HWR_GetTexturePalette();
+		Surf.PolyColor.rgba = palette[color&0xFF].rgba;
 		Surf.PolyColor.s.alpha = softwaretranstogl[strength];
+		poly_flags |= PF_Translucent;
 	}
-	HWD.pfnDrawPolygon(&Surf, v, 4, PF_NoTexture|PF_Modulated|PF_Translucent|PF_NoDepthTest);
+	HWD.pfnDrawPolygon(&Surf, v, 4, poly_flags);
 }
 
 // -----------------+
@@ -897,7 +941,8 @@ void HWR_DrawFadeFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 color, UINT16 ac
 	}
 	else // Do TRANSMAP** fade.
 	{
-		Surf.PolyColor.rgba = V_GetColor(actualcolor).rgba;
+		RGBA_t *palette = HWR_GetTexturePalette();
+		Surf.PolyColor.rgba = palette[actualcolor&0xFF].rgba;
 		Surf.PolyColor.s.alpha = softwaretranstogl[strength];
 	}
 	HWD.pfnDrawPolygon(&Surf, v, 4, PF_NoTexture|PF_Modulated|PF_Translucent|PF_NoDepthTest);
@@ -1102,8 +1147,9 @@ void HWR_drawAMline(const fline_t *fl, INT32 color)
 {
 	F2DCoord v1, v2;
 	RGBA_t color_rgba;
+	RGBA_t *palette = HWR_GetTexturePalette();
 
-	color_rgba = V_GetColor(color);
+	color_rgba = palette[color&0xFF];
 
 	v1.x = ((float)fl->a.x-(vid.width/2.0f))*(2.0f/vid.width);
 	v1.y = ((float)fl->a.y-(vid.height/2.0f))*(2.0f/vid.height);
@@ -1288,6 +1334,7 @@ void HWR_DrawFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 color)
 	FOutVector v[4];
 	FSurfaceInfo Surf;
 	float fx, fy, fw, fh;
+	RGBA_t *palette = HWR_GetTexturePalette();
 	UINT8 alphalevel = ((color & V_ALPHAMASK) >> V_ALPHASHIFT);
 
 	UINT8 perplayershuffle = 0;
@@ -1374,7 +1421,7 @@ void HWR_DrawFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 color)
 	{
 		if (x == 0 && y == 0 && w == BASEVIDWIDTH && h == BASEVIDHEIGHT)
 		{
-			RGBA_t rgbaColour = V_GetColor(color);
+			RGBA_t rgbaColour = palette[color&0xFF];
 			FRGBAFloat clearColour;
 			clearColour.red = (float)rgbaColour.s.red / 255;
 			clearColour.green = (float)rgbaColour.s.green / 255;
@@ -1451,7 +1498,7 @@ void HWR_DrawFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 color)
 	v[0].t = v[1].t = 0.0f;
 	v[2].t = v[3].t = 1.0f;
 
-	Surf.PolyColor = V_GetColor(color);
+	Surf.PolyColor = palette[color&0xFF];
 
 	if (alphalevel)
 	{
@@ -1539,11 +1586,12 @@ static inline boolean saveTGA(const char *file_name, void *buffer,
 UINT8 *HWR_GetScreenshot(void)
 {
 	UINT8 *buf = malloc(vid.width * vid.height * 3 * sizeof (*buf));
+	int tex = HWR_ShouldUsePaletteRendering() ? HWD_SCREENTEXTURE_GENERIC3 : HWD_SCREENTEXTURE_GENERIC2;
 
 	if (!buf)
 		return NULL;
 	// returns 24bit 888 RGB
-	HWD.pfnReadRect(0, 0, vid.width, vid.height, vid.width * 3, (void *)buf);
+	HWD.pfnReadScreenTexture(tex, (void *)buf);
 	return buf;
 }
 
@@ -1551,6 +1599,7 @@ boolean HWR_Screenshot(const char *pathname)
 {
 	boolean ret;
 	UINT8 *buf = malloc(vid.width * vid.height * 3 * sizeof (*buf));
+	int tex = HWR_ShouldUsePaletteRendering() ? HWD_SCREENTEXTURE_GENERIC3 : HWD_SCREENTEXTURE_GENERIC2;
 
 	if (!buf)
 	{
@@ -1559,7 +1608,7 @@ boolean HWR_Screenshot(const char *pathname)
 	}
 
 	// returns 24bit 888 RGB
-	HWD.pfnReadRect(0, 0, vid.width, vid.height, vid.width * 3, (void *)buf);
+	HWD.pfnReadScreenTexture(tex, (void *)buf);
 
 #ifdef USE_PNG
 	ret = M_SavePNG(pathname, buf, vid.width, vid.height, NULL);
diff --git a/src/hardware/hw_drv.h b/src/hardware/hw_drv.h
index 1c4cd99ab03d34498fa13d132a01ef53af9e6e61..ba0258c120de3bd388513c827d03ecaa8689424e 100644
--- a/src/hardware/hw_drv.h
+++ b/src/hardware/hw_drv.h
@@ -32,7 +32,7 @@ EXPORT void HWRAPI(Shutdown) (void);
 #ifdef _WINDOWS
 EXPORT void HWRAPI(GetModeList) (vmode_t **pvidmodes, INT32 *numvidmodes);
 #endif
-EXPORT void HWRAPI(SetPalette) (RGBA_t *ppal);
+EXPORT void HWRAPI(SetTexturePalette) (RGBA_t *ppal);
 EXPORT void HWRAPI(FinishUpdate) (INT32 waitvbl);
 EXPORT void HWRAPI(Draw2DLine) (F2DCoord *v1, F2DCoord *v2, RGBA_t Color);
 EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags);
@@ -43,11 +43,10 @@ EXPORT void HWRAPI(ClearBuffer) (FBOOLEAN ColorMask, FBOOLEAN DepthMask, FRGBAFl
 EXPORT void HWRAPI(SetTexture) (GLMipmap_t *TexInfo);
 EXPORT void HWRAPI(UpdateTexture) (GLMipmap_t *TexInfo);
 EXPORT void HWRAPI(DeleteTexture) (GLMipmap_t *TexInfo);
-EXPORT void HWRAPI(ReadRect) (INT32 x, INT32 y, INT32 width, INT32 height, INT32 dst_stride, UINT16 *dst_data);
+EXPORT void HWRAPI(ReadScreenTexture) (int tex, UINT8 *dst_data);
 EXPORT void HWRAPI(GClipRect) (INT32 minx, INT32 miny, INT32 maxx, INT32 maxy, float nearclip);
 EXPORT void HWRAPI(ClearMipMapCache) (void);
 
-//Hurdler: added for backward compatibility
 EXPORT void HWRAPI(SetSpecialState) (hwdspecialstate_t IdState, INT32 Value);
 
 //Hurdler: added for new development
@@ -57,24 +56,26 @@ EXPORT void HWRAPI(SetTransform) (FTransform *ptransform);
 EXPORT INT32 HWRAPI(GetTextureUsed) (void);
 
 EXPORT void HWRAPI(FlushScreenTextures) (void);
-EXPORT void HWRAPI(StartScreenWipe) (void);
-EXPORT void HWRAPI(EndScreenWipe) (void);
-EXPORT void HWRAPI(DoScreenWipe) (void);
-EXPORT void HWRAPI(DrawIntermissionBG) (void);
-EXPORT void HWRAPI(MakeScreenTexture) (void);
-EXPORT void HWRAPI(MakeScreenFinalTexture) (void);
-EXPORT void HWRAPI(DrawScreenFinalTexture) (int width, int height);
+EXPORT void HWRAPI(DoScreenWipe) (int wipeStart, int wipeEnd, FSurfaceInfo *surf, FBITFIELD polyFlags);
+EXPORT void HWRAPI(DrawScreenTexture) (int tex, FSurfaceInfo *surf, FBITFIELD polyflags);
+EXPORT void HWRAPI(MakeScreenTexture) (int tex);
+EXPORT void HWRAPI(DrawScreenFinalTexture) (int tex, int width, int height);
 
 #define SCREENVERTS 10
 EXPORT void HWRAPI(PostImgRedraw) (float points[SCREENVERTS][SCREENVERTS][2]);
 
-EXPORT boolean HWRAPI(CompileShaders) (void);
-EXPORT void HWRAPI(CleanShaders) (void);
-EXPORT void HWRAPI(SetShader) (int type);
+EXPORT boolean HWRAPI(InitShaders) (void);
+EXPORT void HWRAPI(LoadShader) (int slot, char *code, hwdshaderstage_t stage);
+EXPORT boolean HWRAPI(CompileShader) (int slot);
+EXPORT void HWRAPI(SetShader) (int slot);
 EXPORT void HWRAPI(UnSetShader) (void);
 
 EXPORT void HWRAPI(SetShaderInfo) (hwdshaderinfo_t info, INT32 value);
-EXPORT void HWRAPI(LoadCustomShader) (int number, char *code, size_t size, boolean isfragment);
+
+EXPORT void HWRAPI(SetPaletteLookup)(UINT8 *lut);
+EXPORT UINT32 HWRAPI(CreateLightTable)(RGBA_t *hw_lighttable);
+EXPORT void HWRAPI(ClearLightTables)(void);
+EXPORT void HWRAPI(SetScreenPalette)(RGBA_t *palette);
 
 // ==========================================================================
 //                                      HWR DRIVER OBJECT, FOR CLIENT PROGRAM
@@ -85,7 +86,7 @@ EXPORT void HWRAPI(LoadCustomShader) (int number, char *code, size_t size, boole
 struct hwdriver_s
 {
 	Init                pfnInit;
-	SetPalette          pfnSetPalette;
+	SetTexturePalette   pfnSetTexturePalette;
 	FinishUpdate        pfnFinishUpdate;
 	Draw2DLine          pfnDraw2DLine;
 	DrawPolygon         pfnDrawPolygon;
@@ -96,10 +97,10 @@ struct hwdriver_s
 	SetTexture          pfnSetTexture;
 	UpdateTexture       pfnUpdateTexture;
 	DeleteTexture       pfnDeleteTexture;
-	ReadRect            pfnReadRect;
+	ReadScreenTexture   pfnReadScreenTexture;
 	GClipRect           pfnGClipRect;
 	ClearMipMapCache    pfnClearMipMapCache;
-	SetSpecialState     pfnSetSpecialState;//Hurdler: added for backward compatibility
+	SetSpecialState     pfnSetSpecialState;
 	DrawModel           pfnDrawModel;
 	CreateModelVBOs     pfnCreateModelVBOs;
 	SetTransform        pfnSetTransform;
@@ -112,21 +113,23 @@ struct hwdriver_s
 #endif
 	PostImgRedraw       pfnPostImgRedraw;
 	FlushScreenTextures pfnFlushScreenTextures;
-	StartScreenWipe     pfnStartScreenWipe;
-	EndScreenWipe       pfnEndScreenWipe;
 	DoScreenWipe        pfnDoScreenWipe;
-	DrawIntermissionBG  pfnDrawIntermissionBG;
+	DrawScreenTexture   pfnDrawScreenTexture;
 	MakeScreenTexture   pfnMakeScreenTexture;
-	MakeScreenFinalTexture  pfnMakeScreenFinalTexture;
 	DrawScreenFinalTexture  pfnDrawScreenFinalTexture;
 
-	CompileShaders      pfnCompileShaders;
-	CleanShaders        pfnCleanShaders;
+	InitShaders         pfnInitShaders;
+	LoadShader          pfnLoadShader;
+	CompileShader       pfnCompileShader;
 	SetShader           pfnSetShader;
 	UnSetShader         pfnUnSetShader;
 
 	SetShaderInfo       pfnSetShaderInfo;
-	LoadCustomShader    pfnLoadCustomShader;
+
+	SetPaletteLookup    pfnSetPaletteLookup;
+	CreateLightTable    pfnCreateLightTable;
+	ClearLightTables    pfnClearLightTables;
+	SetScreenPalette    pfnSetScreenPalette;
 };
 
 extern struct hwdriver_s hwdriver;
diff --git a/src/hardware/hw_glob.h b/src/hardware/hw_glob.h
index fbb02f46322c614107fc5523884e7081064c5c4a..094d356d530a24f010896141a8f6b8bc6b0777ae 100644
--- a/src/hardware/hw_glob.h
+++ b/src/hardware/hw_glob.h
@@ -107,6 +107,8 @@ void HWR_FreeExtraSubsectors(void);
 // --------
 // hw_cache.c
 // --------
+RGBA_t *HWR_GetTexturePalette(void);
+
 void HWR_InitMapTextures(void);
 void HWR_LoadMapTextures(size_t pnumtextures);
 void HWR_FreeMapTextures(void);
@@ -131,6 +133,10 @@ void HWR_FreeColormapCache(void);
 void HWR_UnlockCachedPatch(GLPatch_t *gpatch);
 
 void HWR_SetPalette(RGBA_t *palette);
+void HWR_SetMapPalette(void);
+UINT32 HWR_CreateLightTable(UINT8 *lighttable);
+UINT32 HWR_GetLightTableID(extracolormap_t *colormap);
+void HWR_ClearLightTables(void);
 
 
 // --------
@@ -139,4 +145,18 @@ void HWR_SetPalette(RGBA_t *palette);
 extern INT32 patchformat;
 extern INT32 textureformat;
 
+// --------
+// hw_shaders.c
+// --------
+boolean HWR_InitShaders(void);
+void HWR_CompileShaders(void);
+
+int HWR_GetShaderFromTarget(int shader_target);
+
+void HWR_LoadAllCustomShaders(void);
+void HWR_LoadCustomShadersFromFile(UINT16 wadnum, boolean PK3);
+const char *HWR_GetShaderName(INT32 shader);
+
+extern customshaderxlat_t shaderxlat[];
+
 #endif //_HW_GLOB_
diff --git a/src/hardware/hw_main.c b/src/hardware/hw_main.c
index 9c1a95c9316626ce5f02747eaba356b60368ee62..1cfffca541d048fece90d1b1970b5b2f62ab69bc 100644
--- a/src/hardware/hw_main.c
+++ b/src/hardware/hw_main.c
@@ -130,26 +130,6 @@ static line_t *gl_linedef;
 static sector_t *gl_frontsector;
 static sector_t *gl_backsector;
 
-// --------------------------------------------------------------------------
-//                                              STUFF FOR THE PROJECTION CODE
-// --------------------------------------------------------------------------
-
-FTransform atransform;
-// duplicates of the main code, set after R_SetupFrame() passed them into sharedstruct,
-// copied here for local use
-static fixed_t dup_viewx, dup_viewy, dup_viewz;
-static angle_t dup_viewangle;
-
-static float gl_viewx, gl_viewy, gl_viewz;
-float gl_viewsin, gl_viewcos;
-
-// Maybe not necessary with the new T&L code (needs to be checked!)
-static float gl_viewludsin, gl_viewludcos; // look up down kik test
-static float gl_fovlud;
-
-static angle_t gl_aimingangle;
-static void HWR_SetTransformAiming(FTransform *trans, player_t *player, boolean skybox);
-
 // Render stats
 ps_metric_t ps_hw_skyboxtime = {0};
 ps_metric_t ps_hw_nodesorttime = {0};
@@ -170,13 +150,39 @@ ps_metric_t ps_hw_batchdrawtime = {0};
 
 boolean gl_init = false;
 boolean gl_maploaded = false;
-boolean gl_shadersavailable = true;
+boolean gl_sessioncommandsadded = false;
+// false if shaders have not been initialized yet, or if shaders are not available
+boolean gl_shadersavailable = false;
+
+// Whether the internal state is set to palette rendering or not.
+static boolean gl_palette_rendering_state = false;
+
+// --------------------------------------------------------------------------
+//                                              STUFF FOR THE PROJECTION CODE
+// --------------------------------------------------------------------------
+
+FTransform atransform;
+// duplicates of the main code, set after R_SetupFrame() passed them into sharedstruct,
+// copied here for local use
+static fixed_t dup_viewx, dup_viewy, dup_viewz;
+static angle_t dup_viewangle;
+
+static float gl_viewx, gl_viewy, gl_viewz;
+float gl_viewsin, gl_viewcos;
+
+// Maybe not necessary with the new T&L code (needs to be checked!)
+static float gl_viewludsin, gl_viewludcos; // look up down kik test
+static float gl_fovlud;
+
+static angle_t gl_aimingangle;
+static void HWR_SetTransformAiming(FTransform *trans, player_t *player, boolean skybox);
 
 // ==========================================================================
 // Lighting
 // ==========================================================================
 
-static boolean HWR_UseShader(void)
+// Returns true if shaders can be used.
+boolean HWR_UseShader(void)
 {
 	return (cv_glshaders.value && gl_shadersavailable);
 }
@@ -242,6 +248,11 @@ void HWR_Lighting(FSurfaceInfo *Surface, INT32 light_level, extracolormap_t *col
 	Surface->LightInfo.light_level = light_level;
 	Surface->LightInfo.fade_start = (colormap != NULL) ? colormap->fadestart : 0;
 	Surface->LightInfo.fade_end = (colormap != NULL) ? colormap->fadeend : 31;
+
+	if (HWR_ShouldUsePaletteRendering())
+		Surface->LightTableId = HWR_GetLightTableID(colormap);
+	else
+		Surface->LightTableId = 0;
 }
 
 UINT8 HWR_FogBlockAlpha(INT32 light, extracolormap_t *colormap) // Let's see if this can work
@@ -372,7 +383,7 @@ static void HWR_RenderPlane(subsector_t *subsector, extrasubsector_t *xsub, bool
 	FOutVector *v3d;
 	polyvertex_t *pv;
 	pslope_t *slope = NULL;
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 
 	size_t nrPlaneVerts;
 	INT32 i;
@@ -759,7 +770,7 @@ static void HWR_AddTransparentWall(FOutVector *wallVerts, FSurfaceInfo *pSurf, I
 //
 static void HWR_ProjectWall(FOutVector *wallVerts, FSurfaceInfo *pSurf, FBITFIELD blendmode, INT32 lightlevel, extracolormap_t *wallcolormap)
 {
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 
 	HWR_Lighting(pSurf, lightlevel, wallcolormap);
 
@@ -872,13 +883,15 @@ static void HWR_SplitWall(sector_t *sector, FOutVector *wallVerts, INT32 texnum,
 		{
 			if (pfloor && (pfloor->fofflags & FOF_FOG))
 			{
-				lightnum = HWR_CalcWallLight(pfloor->master->frontsector->lightlevel, v1x, v1y, v2x, v2y);
+				lightnum = pfloor->master->frontsector->lightlevel;
 				colormap = pfloor->master->frontsector->extra_colormap;
+				lightnum = colormap ? lightnum : HWR_CalcWallLight(lightnum, v1x, v1y, v2x, v2y);
 			}
 			else
 			{
-				lightnum = HWR_CalcWallLight(*list[i].lightlevel, v1x, v1y, v2x, v2y);
+				lightnum = *list[i].lightlevel;
 				colormap = *list[i].extra_colormap;
+				lightnum = colormap ? lightnum : HWR_CalcWallLight(lightnum, v1x, v1y, v2x, v2y);
 			}
 		}
 
@@ -1109,8 +1122,9 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom
 	float cliplow = (float)texturehpeg;
 	float cliphigh = (float)(texturehpeg + (gl_curline->flength*FRACUNIT));
 
-	FUINT lightnum = HWR_CalcWallLight(gl_frontsector->lightlevel, vs.x, vs.y, ve.x, ve.y);
+	FUINT lightnum = gl_frontsector->lightlevel;
 	extracolormap_t *colormap = gl_frontsector->extra_colormap;
+	lightnum = colormap ? lightnum : HWR_CalcWallLight(lightnum, vs.x, vs.y, ve.x, ve.y);
 
 	FSurfaceInfo Surf;
 	Surf.PolyColor.s.alpha = 255;
@@ -1705,8 +1719,9 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom
 
 					blendmode = PF_Fog|PF_NoTexture;
 
-					lightnum = HWR_CalcWallLight(rover->master->frontsector->lightlevel, vs.x, vs.y, ve.x, ve.y);
+					lightnum = rover->master->frontsector->lightlevel;
 					colormap = rover->master->frontsector->extra_colormap;
+					lightnum = colormap ? lightnum : HWR_CalcWallLight(lightnum, vs.x, vs.y, ve.x, ve.y);
 
 					Surf.PolyColor.s.alpha = HWR_FogBlockAlpha(rover->master->frontsector->lightlevel, rover->master->frontsector->extra_colormap);
 
@@ -1827,8 +1842,9 @@ static void HWR_ProcessSeg(void) // Sort of like GLWall::Process in GZDoom
 
 					blendmode = PF_Fog|PF_NoTexture;
 
-					lightnum = HWR_CalcWallLight(rover->master->frontsector->lightlevel, vs.x, vs.y, ve.x, ve.y);
+					lightnum = rover->master->frontsector->lightlevel;
 					colormap = rover->master->frontsector->extra_colormap;
+					lightnum = colormap ? lightnum : HWR_CalcWallLight(lightnum, vs.x, vs.y, ve.x, ve.y);
 
 					Surf.PolyColor.s.alpha = HWR_FogBlockAlpha(rover->master->frontsector->lightlevel, rover->master->frontsector->extra_colormap);
 
@@ -2694,7 +2710,7 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling,
 {
 	FSurfaceInfo Surf;
 	FOutVector *v3d;
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 
 	size_t nrPlaneVerts = polysector->numVertices;
 	INT32 i;
@@ -3527,7 +3543,7 @@ static void HWR_DrawDropShadow(mobj_t *thing, fixed_t scale)
 	float fscale; float fx; float fy; float offset;
 	extracolormap_t *colormap = NULL;
 	FBITFIELD blendmode = PF_Translucent|PF_Modulated;
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 	UINT8 i;
 	INT32 heightsec, phs;
 	SINT8 flip = P_MobjFlip(thing);
@@ -3735,7 +3751,7 @@ static void HWR_SplitSprite(gl_vissprite_t *spr)
 	boolean lightset = true;
 	FBITFIELD blend = 0;
 	FBITFIELD occlusion;
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 	boolean use_linkdraw_hack = false;
 	UINT8 alpha;
 
@@ -4306,7 +4322,7 @@ static void HWR_DrawSprite(gl_vissprite_t *spr)
 	}
 
 	{
-		INT32 shader = SHADER_DEFAULT;
+		INT32 shader = SHADER_NONE;
 		FBITFIELD blend = 0;
 		FBITFIELD occlusion;
 		boolean use_linkdraw_hack = false;
@@ -4380,7 +4396,7 @@ static void HWR_DrawSprite(gl_vissprite_t *spr)
 // Sprite drawer for precipitation
 static inline void HWR_DrawPrecipitationSprite(gl_vissprite_t *spr)
 {
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 	FBITFIELD blend = 0;
 	FOutVector wallVerts[4];
 	patch_t *gpatch;
@@ -4825,7 +4841,6 @@ static void HWR_CreateDrawNodes(void)
 
 	// Okay! Let's draw it all! Woo!
 	HWD.pfnSetTransform(&atransform);
-	HWD.pfnSetShader(SHADER_DEFAULT);
 
 	for (i = 0; i < p; i++)
 	{
@@ -5884,7 +5899,8 @@ static void HWR_DrawSkyBackground(player_t *player)
 			HWR_BuildSkyDome();
 		}
 
-		HWD.pfnSetShader(SHADER_SKY); // sky shader
+		if (HWR_UseShader())
+			HWD.pfnSetShader(HWR_GetShaderFromTarget(SHADER_SKY));
 		HWD.pfnSetTransform(&dometransform);
 		HWD.pfnRenderSkyDome(&gl_sky);
 	}
@@ -5970,8 +5986,6 @@ static void HWR_DrawSkyBackground(player_t *player)
 		HWD.pfnUnSetShader();
 		HWD.pfnDrawPolygon(NULL, v, 4, 0);
 	}
-
-	HWD.pfnSetShader(SHADER_DEFAULT);
 }
 
 
@@ -6061,13 +6075,7 @@ static void HWR_SetTransformAiming(FTransform *trans, player_t *player, boolean
 //
 static void HWR_SetShaderState(void)
 {
-	hwdshaderoption_t state = cv_glshaders.value;
-
-	if (!cv_glallowshaders.value)
-		state = (cv_glshaders.value == HWD_SHADEROPTION_ON ? HWD_SHADEROPTION_NOCUSTOM : cv_glshaders.value);
-
-	HWD.pfnSetSpecialState(HWD_SET_SHADERS, (INT32)state);
-	HWD.pfnSetShader(SHADER_DEFAULT);
+	HWD.pfnSetSpecialState(HWD_SET_SHADERS, (INT32)HWR_UseShader());
 }
 
 // ==========================================================================
@@ -6083,6 +6091,7 @@ void HWR_RenderSkyboxView(INT32 viewnumber, player_t *player)
 	else
 		type = &postimgtype;
 
+	if (!HWR_ShouldUsePaletteRendering())
 	{
 		// do we really need to save player (is it not the same)?
 		player_t *saved_player = stplyr;
@@ -6275,6 +6284,7 @@ void HWR_RenderPlayerView(INT32 viewnumber, player_t *player)
 		HWR_RenderSkyboxView(viewnumber, player); // This is drawn before everything else so it is placed behind
 	PS_STOP_TIMING(ps_hw_skyboxtime);
 
+	if (!HWR_ShouldUsePaletteRendering())
 	{
 		// do we really need to save player (is it not the same)?
 		player_t *saved_player = stplyr;
@@ -6451,6 +6461,56 @@ void HWR_RenderPlayerView(INT32 viewnumber, player_t *player)
 	HWD.pfnGClipRect(0, 0, vid.width, vid.height, NZCLIP_PLANE);
 }
 
+// Returns whether palette rendering is "actually enabled."
+// Can't have palette rendering if shaders are disabled.
+boolean HWR_ShouldUsePaletteRendering(void)
+{
+	return (cv_glpaletterendering.value && HWR_UseShader());
+}
+
+// enable or disable palette rendering state depending on settings and availability
+// called when relevant settings change
+// shader recompilation is done in the cvar callback
+static void HWR_TogglePaletteRendering(void)
+{
+	// which state should we go to?
+	if (HWR_ShouldUsePaletteRendering())
+	{
+		// are we not in that state already?
+		if (!gl_palette_rendering_state)
+		{
+			gl_palette_rendering_state = true;
+
+			// The textures will still be converted to RGBA by r_opengl.
+			// This however makes hw_cache use paletted blending for composite textures!
+			// (patchformat is not touched)
+			textureformat = GL_TEXFMT_P_8;
+
+			HWR_SetMapPalette();
+			HWR_SetPalette(pLocalPalette);
+
+			// If the r_opengl "texture palette" stays the same during this switch, these textures
+			// will not be cleared out. However they are still out of date since the
+			// composite texture blending method has changed. Therefore they need to be cleared.
+			HWR_LoadMapTextures(numtextures);
+		}
+	}
+	else
+	{
+		// are we not in that state already?
+		if (gl_palette_rendering_state)
+		{
+			gl_palette_rendering_state = false;
+			textureformat = GL_TEXFMT_RGBA;
+			HWR_SetPalette(pLocalPalette);
+			// If the r_opengl "texture palette" stays the same during this switch, these textures
+			// will not be cleared out. However they are still out of date since the
+			// composite texture blending method has changed. Therefore they need to be cleared.
+			HWR_LoadMapTextures(numtextures);
+		}
+	}
+}
+
 void HWR_LoadLevel(void)
 {
 #ifdef ALAM_LIGHTING
@@ -6464,6 +6524,9 @@ void HWR_LoadLevel(void)
 	HWR_ClearSkyDome();
 	HWR_BuildSkyDome();
 
+	if (HWR_ShouldUsePaletteRendering())
+		HWR_SetMapPalette();
+
 	gl_maploaded = true;
 }
 
@@ -6471,13 +6534,17 @@ void HWR_LoadLevel(void)
 //                                                         3D ENGINE COMMANDS
 // ==========================================================================
 
-static CV_PossibleValue_t glshaders_cons_t[] = {{HWD_SHADEROPTION_OFF, "Off"}, {HWD_SHADEROPTION_ON, "On"}, {HWD_SHADEROPTION_NOCUSTOM, "Ignore custom shaders"}, {0, NULL}};
+static CV_PossibleValue_t glshaders_cons_t[] = {{0, "Off"}, {1, "On"}, {2, "Ignore custom shaders"}, {0, NULL}};
 static CV_PossibleValue_t glmodelinterpolation_cons_t[] = {{0, "Off"}, {1, "Sometimes"}, {2, "Always"}, {0, NULL}};
 static CV_PossibleValue_t glfakecontrast_cons_t[] = {{0, "Off"}, {1, "On"}, {2, "Smooth"}, {0, NULL}};
 static CV_PossibleValue_t glshearing_cons_t[] = {{0, "Off"}, {1, "On"}, {2, "Third-person"}, {0, NULL}};
 
 static void CV_glfiltermode_OnChange(void);
 static void CV_glanisotropic_OnChange(void);
+static void CV_glmodellighting_OnChange(void);
+static void CV_glpaletterendering_OnChange(void);
+static void CV_glpalettedepth_OnChange(void);
+static void CV_glshaders_OnChange(void);
 
 static CV_PossibleValue_t glfiltermode_cons_t[]= {{HWD_SET_TEXTUREFILTER_POINTSAMPLED, "Nearest"},
 	{HWD_SET_TEXTUREFILTER_BILINEAR, "Bilinear"}, {HWD_SET_TEXTUREFILTER_TRILINEAR, "Trilinear"},
@@ -6487,7 +6554,7 @@ static CV_PossibleValue_t glfiltermode_cons_t[]= {{HWD_SET_TEXTUREFILTER_POINTSA
 	{0, NULL}};
 CV_PossibleValue_t glanisotropicmode_cons_t[] = {{1, "MIN"}, {16, "MAX"}, {0, NULL}};
 
-consvar_t cv_glshaders = CVAR_INIT ("gr_shaders", "On", CV_SAVE, glshaders_cons_t, NULL);
+consvar_t cv_glshaders = CVAR_INIT ("gr_shaders", "On", CV_SAVE|CV_CALL, glshaders_cons_t, CV_glshaders_OnChange);
 
 #ifdef ALAM_LIGHTING
 consvar_t cv_gldynamiclighting = CVAR_INIT ("gr_dynamiclighting", "On", CV_SAVE, CV_OnOff, NULL);
@@ -6498,7 +6565,7 @@ consvar_t cv_glcoronasize = CVAR_INIT ("gr_coronasize", "1", CV_SAVE|CV_FLOAT, 0
 
 consvar_t cv_glmodels = CVAR_INIT ("gr_models", "Off", CV_SAVE, CV_OnOff, NULL);
 consvar_t cv_glmodelinterpolation = CVAR_INIT ("gr_modelinterpolation", "Sometimes", CV_SAVE, glmodelinterpolation_cons_t, NULL);
-consvar_t cv_glmodellighting = CVAR_INIT ("gr_modellighting", "Off", CV_SAVE, CV_OnOff, NULL);
+consvar_t cv_glmodellighting = CVAR_INIT ("gr_modellighting", "Off", CV_SAVE|CV_CALL, CV_OnOff, CV_glmodellighting_OnChange);
 
 consvar_t cv_glshearing = CVAR_INIT ("gr_shearing", "Off", CV_SAVE, glshearing_cons_t, NULL);
 consvar_t cv_glspritebillboarding = CVAR_INIT ("gr_spritebillboarding", "Off", CV_SAVE, CV_OnOff, NULL);
@@ -6513,18 +6580,61 @@ consvar_t cv_glsolvetjoin = CVAR_INIT ("gr_solvetjoin", "On", 0, CV_OnOff, NULL)
 
 consvar_t cv_glbatching = CVAR_INIT ("gr_batching", "On", 0, CV_OnOff, NULL);
 
+static CV_PossibleValue_t glpalettedepth_cons_t[] = {{16, "16 bits"}, {24, "24 bits"}, {0, NULL}};
+
+consvar_t cv_glpaletterendering = CVAR_INIT ("gr_paletterendering", "Off", CV_SAVE|CV_CALL, CV_OnOff, CV_glpaletterendering_OnChange);
+consvar_t cv_glpalettedepth = CVAR_INIT ("gr_palettedepth", "16 bits", CV_SAVE|CV_CALL, glpalettedepth_cons_t, CV_glpalettedepth_OnChange);
+
+#define ONLY_IF_GL_LOADED if (vid.glstate != VID_GL_LIBRARY_LOADED) return;
 consvar_t cv_glwireframe = CVAR_INIT ("gr_wireframe", "Off", 0, CV_OnOff, NULL);
 
 static void CV_glfiltermode_OnChange(void)
 {
-	if (rendermode == render_opengl)
-		HWD.pfnSetSpecialState(HWD_SET_TEXTUREFILTERMODE, cv_glfiltermode.value);
+	ONLY_IF_GL_LOADED
+	HWD.pfnSetSpecialState(HWD_SET_TEXTUREFILTERMODE, cv_glfiltermode.value);
 }
 
 static void CV_glanisotropic_OnChange(void)
 {
-	if (rendermode == render_opengl)
-		HWD.pfnSetSpecialState(HWD_SET_TEXTUREANISOTROPICMODE, cv_glanisotropicmode.value);
+	ONLY_IF_GL_LOADED
+	HWD.pfnSetSpecialState(HWD_SET_TEXTUREANISOTROPICMODE, cv_glanisotropicmode.value);
+}
+
+static void CV_glmodellighting_OnChange(void)
+{
+	ONLY_IF_GL_LOADED
+	// if shaders have been compiled, then they now need to be recompiled.
+	if (gl_shadersavailable)
+		HWR_CompileShaders();
+}
+
+static void CV_glpaletterendering_OnChange(void)
+{
+	ONLY_IF_GL_LOADED
+	if (gl_shadersavailable)
+	{
+		HWR_CompileShaders();
+		HWR_TogglePaletteRendering();
+	}
+}
+
+static void CV_glpalettedepth_OnChange(void)
+{
+	ONLY_IF_GL_LOADED
+	// refresh the screen palette
+	if (HWR_ShouldUsePaletteRendering())
+		HWR_SetPalette(pLocalPalette);
+}
+
+static void CV_glshaders_OnChange(void)
+{
+	ONLY_IF_GL_LOADED
+	HWR_SetShaderState();
+	if (cv_glpaletterendering.value)
+	{
+		// can't do palette rendering without shaders, so update the state if needed
+		HWR_TogglePaletteRendering();
+	}
 }
 
 //added by Hurdler: console varibale that are saved
@@ -6553,6 +6663,8 @@ void HWR_AddCommands(void)
 
 	CV_RegisterVar(&cv_glbatching);
 
+	CV_RegisterVar(&cv_glpaletterendering);
+	CV_RegisterVar(&cv_glpalettedepth);
 	CV_RegisterVar(&cv_glwireframe);
 
 #ifndef NEWCLIP
@@ -6569,6 +6681,8 @@ void HWR_Startup(void)
 	{
 		CONS_Printf("HWR_Startup()...\n");
 
+		textureformat = patchformat = GL_TEXFMT_RGBA;
+
 		HWR_InitPolyPool();
 		HWR_InitMapTextures();
 		HWR_InitModels();
@@ -6576,14 +6690,12 @@ void HWR_Startup(void)
 		HWR_InitLight();
 #endif
 
+		gl_shadersavailable = HWR_InitShaders();
+		HWR_SetShaderState();
 		HWR_LoadAllCustomShaders();
-		if (!HWR_CompileShaders())
-			gl_shadersavailable = false;
+		HWR_TogglePaletteRendering();
 	}
 
-	if (rendermode == render_opengl)
-		textureformat = patchformat = GL_TEXFMT_RGBA;
-
 	gl_init = true;
 }
 
@@ -6673,7 +6785,7 @@ void HWR_RenderWall(FOutVector *wallVerts, FSurfaceInfo *pSurf, FBITFIELD blend,
 	FBITFIELD blendmode = blend;
 	UINT8 alpha = pSurf->PolyColor.s.alpha; // retain the alpha
 
-	INT32 shader = SHADER_DEFAULT;
+	INT32 shader = SHADER_NONE;
 
 	// Lighting is done here instead so that fog isn't drawn incorrectly on transparent walls after sorting
 	HWR_Lighting(pSurf, lightlevel, wallcolormap);
@@ -6718,7 +6830,7 @@ void HWR_DoPostProcessor(player_t *player)
 
 	// Armageddon Blast Flash!
 	// Could this even be considered postprocessor?
-	if (player->flashcount)
+	if (player->flashcount && !HWR_ShouldUsePaletteRendering())
 	{
 		FOutVector      v[4];
 		FSurfaceInfo Surf;
@@ -6743,7 +6855,7 @@ void HWR_DoPostProcessor(player_t *player)
 
 	// Capture the screen for intermission and screen waving
 	if(gamestate != GS_INTERMISSION)
-		HWD.pfnMakeScreenTexture();
+		HWD.pfnMakeScreenTexture(HWD_SCREENTEXTURE_GENERIC1);
 
 	if (splitscreen) // Not supported in splitscreen - someone want to add support?
 		return;
@@ -6787,7 +6899,7 @@ void HWR_DoPostProcessor(player_t *player)
 
 		// Capture the screen again for screen waving on the intermission
 		if(gamestate != GS_INTERMISSION)
-			HWD.pfnMakeScreenTexture();
+			HWD.pfnMakeScreenTexture(HWD_SCREENTEXTURE_GENERIC1);
 	}
 	// Flipping of the screen isn't done here anymore
 }
@@ -6795,18 +6907,18 @@ void HWR_DoPostProcessor(player_t *player)
 void HWR_StartScreenWipe(void)
 {
 	//CONS_Debug(DBG_RENDER, "In HWR_StartScreenWipe()\n");
-	HWD.pfnStartScreenWipe();
+	HWD.pfnMakeScreenTexture(HWD_SCREENTEXTURE_WIPE_START);
 }
 
 void HWR_EndScreenWipe(void)
 {
 	//CONS_Debug(DBG_RENDER, "In HWR_EndScreenWipe()\n");
-	HWD.pfnEndScreenWipe();
+	HWD.pfnMakeScreenTexture(HWD_SCREENTEXTURE_WIPE_END);
 }
 
 void HWR_DrawIntermissionBG(void)
 {
-	HWD.pfnDrawIntermissionBG();
+	HWD.pfnDrawScreenTexture(HWD_SCREENTEXTURE_GENERIC1, NULL, 0);
 }
 
 //
@@ -6851,201 +6963,40 @@ void HWR_DoWipe(UINT8 wipenum, UINT8 scrnnum)
 		return;
 
 	HWR_GetFadeMask(wipelumpnum);
-	HWD.pfnDoScreenWipe();
-}
+	if (wipestyle == WIPESTYLE_COLORMAP && HWR_UseShader())
+	{
+		FSurfaceInfo surf = {0};
+		FBITFIELD polyflags = PF_Modulated|PF_NoDepthTest;
 
-void HWR_DoTintedWipe(UINT8 wipenum, UINT8 scrnnum)
-{
-	// It does the same thing
-	HWR_DoWipe(wipenum, scrnnum);
+		polyflags |= (wipestyleflags & WSF_TOWHITE) ? PF_Additive : PF_ReverseSubtract;
+		surf.PolyColor.s.red = FADEREDFACTOR;
+		surf.PolyColor.s.green = FADEGREENFACTOR;
+		surf.PolyColor.s.blue = FADEBLUEFACTOR;
+		// polycolor alpha communicates fadein / fadeout to the shader and the backend
+		surf.PolyColor.s.alpha = (wipestyleflags & WSF_FADEIN) ? 255 : 0;
+
+		HWD.pfnSetShader(HWR_GetShaderFromTarget(SHADER_UI_TINTED_WIPE));
+		HWD.pfnDoScreenWipe(HWD_SCREENTEXTURE_WIPE_START, HWD_SCREENTEXTURE_WIPE_END,
+			&surf, polyflags);
+		HWD.pfnUnSetShader();
+	}
+	else
+	{
+		HWD.pfnDoScreenWipe(HWD_SCREENTEXTURE_WIPE_START, HWD_SCREENTEXTURE_WIPE_END,
+			NULL, 0);
+	}
 }
 
 void HWR_MakeScreenFinalTexture(void)
 {
-    HWD.pfnMakeScreenFinalTexture();
+	int tex = HWR_ShouldUsePaletteRendering() ? HWD_SCREENTEXTURE_GENERIC3 : HWD_SCREENTEXTURE_GENERIC2;
+	HWD.pfnMakeScreenTexture(tex);
 }
 
 void HWR_DrawScreenFinalTexture(int width, int height)
 {
-    HWD.pfnDrawScreenFinalTexture(width, height);
-}
-
-static inline UINT16 HWR_FindShaderDefs(UINT16 wadnum)
-{
-	UINT16 i;
-	lumpinfo_t *lump_p;
-
-	lump_p = wadfiles[wadnum]->lumpinfo;
-	for (i = 0; i < wadfiles[wadnum]->numlumps; i++, lump_p++)
-		if (memcmp(lump_p->name, "SHADERS", 7) == 0)
-			return i;
-
-	return INT16_MAX;
-}
-
-boolean HWR_CompileShaders(void)
-{
-	return HWD.pfnCompileShaders();
-}
-
-customshaderxlat_t shaderxlat[] =
-{
-	{"Flat", SHADER_FLOOR},
-	{"WallTexture", SHADER_WALL},
-	{"Sprite", SHADER_SPRITE},
-	{"Model", SHADER_MODEL},
-	{"ModelLighting", SHADER_MODEL_LIGHTING},
-	{"WaterRipple", SHADER_WATER},
-	{"Fog", SHADER_FOG},
-	{"Sky", SHADER_SKY},
-	{NULL, 0},
-};
-
-void HWR_LoadAllCustomShaders(void)
-{
-	INT32 i;
-
-	// read every custom shader
-	for (i = 0; i < numwadfiles; i++)
-		HWR_LoadCustomShadersFromFile(i, W_FileHasFolders(wadfiles[i]));
-}
-
-void HWR_LoadCustomShadersFromFile(UINT16 wadnum, boolean PK3)
-{
-	UINT16 lump;
-	char *shaderdef, *line;
-	char *stoken;
-	char *value;
-	size_t size;
-	int linenum = 1;
-	int shadertype = 0;
-	int i;
-
-	lump = HWR_FindShaderDefs(wadnum);
-	if (lump == INT16_MAX)
-		return;
-
-	shaderdef = W_CacheLumpNumPwad(wadnum, lump, PU_CACHE);
-	size = W_LumpLengthPwad(wadnum, lump);
-
-	line = Z_Malloc(size+1, PU_STATIC, NULL);
-	M_Memcpy(line, shaderdef, size);
-	line[size] = '\0';
-
-	stoken = strtok(line, "\r\n ");
-	while (stoken)
-	{
-		if ((stoken[0] == '/' && stoken[1] == '/')
-			|| (stoken[0] == '#'))// skip comments
-		{
-			stoken = strtok(NULL, "\r\n");
-			goto skip_field;
-		}
-
-		if (!stricmp(stoken, "GLSL"))
-		{
-			value = strtok(NULL, "\r\n ");
-			if (!value)
-			{
-				CONS_Alert(CONS_WARNING, "HWR_LoadCustomShadersFromFile: Missing shader type (file %s, line %d)\n", wadfiles[wadnum]->filename, linenum);
-				stoken = strtok(NULL, "\r\n"); // skip end of line
-				goto skip_lump;
-			}
-
-			if (!stricmp(value, "VERTEX"))
-				shadertype = 1;
-			else if (!stricmp(value, "FRAGMENT"))
-				shadertype = 2;
-
-skip_lump:
-			stoken = strtok(NULL, "\r\n ");
-			linenum++;
-		}
-		else
-		{
-			value = strtok(NULL, "\r\n= ");
-			if (!value)
-			{
-				CONS_Alert(CONS_WARNING, "HWR_LoadCustomShadersFromFile: Missing shader target (file %s, line %d)\n", wadfiles[wadnum]->filename, linenum);
-				stoken = strtok(NULL, "\r\n"); // skip end of line
-				goto skip_field;
-			}
-
-			if (!shadertype)
-			{
-				CONS_Alert(CONS_ERROR, "HWR_LoadCustomShadersFromFile: Missing shader type (file %s, line %d)\n", wadfiles[wadnum]->filename, linenum);
-				Z_Free(line);
-				return;
-			}
-
-			for (i = 0; shaderxlat[i].type; i++)
-			{
-				if (!stricmp(shaderxlat[i].type, stoken))
-				{
-					size_t shader_size;
-					char *shader_source;
-					char *shader_lumpname;
-					UINT16 shader_lumpnum;
-
-					if (PK3)
-					{
-						shader_lumpname = Z_Malloc(strlen(value) + 12, PU_STATIC, NULL);
-						strcpy(shader_lumpname, "Shaders/sh_");
-						strcat(shader_lumpname, value);
-						shader_lumpnum = W_CheckNumForFullNamePK3(shader_lumpname, wadnum, 0);
-					}
-					else
-					{
-						shader_lumpname = Z_Malloc(strlen(value) + 4, PU_STATIC, NULL);
-						strcpy(shader_lumpname, "SH_");
-						strcat(shader_lumpname, value);
-						shader_lumpnum = W_CheckNumForNamePwad(shader_lumpname, wadnum, 0);
-					}
-
-					if (shader_lumpnum == INT16_MAX)
-					{
-						CONS_Alert(CONS_ERROR, "HWR_LoadCustomShadersFromFile: Missing shader source %s (file %s, line %d)\n", shader_lumpname, wadfiles[wadnum]->filename, linenum);
-						Z_Free(shader_lumpname);
-						continue;
-					}
-
-					shader_size = W_LumpLengthPwad(wadnum, shader_lumpnum);
-					shader_source = Z_Malloc(shader_size, PU_STATIC, NULL);
-					W_ReadLumpPwad(wadnum, shader_lumpnum, shader_source);
-
-					HWD.pfnLoadCustomShader(shaderxlat[i].id, shader_source, shader_size, (shadertype == 2));
-
-					Z_Free(shader_source);
-					Z_Free(shader_lumpname);
-				}
-			}
-
-skip_field:
-			stoken = strtok(NULL, "\r\n= ");
-			linenum++;
-		}
-	}
-
-	Z_Free(line);
-	return;
-}
-
-const char *HWR_GetShaderName(INT32 shader)
-{
-	INT32 i;
-
-	if (shader)
-	{
-		for (i = 0; shaderxlat[i].type; i++)
-		{
-			if (shaderxlat[i].id == shader)
-				return shaderxlat[i].type;
-		}
-
-		return "Unknown";
-	}
-
-	return "Default";
+	int tex = HWR_ShouldUsePaletteRendering() ? HWD_SCREENTEXTURE_GENERIC3 : HWD_SCREENTEXTURE_GENERIC2;
+	HWD.pfnDrawScreenFinalTexture(tex, width, height);
 }
 
 #endif // HWRENDER
diff --git a/src/hardware/hw_main.h b/src/hardware/hw_main.h
index 0639bcffeb6446989575bfffb44490433f8473c6..2d4c74583a1c47d35ab22b26e119874dab68e190 100644
--- a/src/hardware/hw_main.h
+++ b/src/hardware/hw_main.h
@@ -61,11 +61,11 @@ void HWR_StartScreenWipe(void);
 void HWR_EndScreenWipe(void);
 void HWR_DrawIntermissionBG(void);
 void HWR_DoWipe(UINT8 wipenum, UINT8 scrnnum);
-void HWR_DoTintedWipe(UINT8 wipenum, UINT8 scrnnum);
 void HWR_MakeScreenFinalTexture(void);
 void HWR_DrawScreenFinalTexture(int width, int height);
 
 // This stuff is put here so models can use them
+boolean HWR_UseShader(void);
 void HWR_Lighting(FSurfaceInfo *Surface, INT32 light_level, extracolormap_t *colormap);
 UINT8 HWR_FogBlockAlpha(INT32 light, extracolormap_t *colormap); // Let's see if this can work
 
@@ -74,13 +74,7 @@ FBITFIELD HWR_GetBlendModeFlag(INT32 style);
 FBITFIELD HWR_SurfaceBlend(INT32 style, INT32 transtablenum, FSurfaceInfo *pSurf);
 FBITFIELD HWR_TranstableToAlpha(INT32 transtablenum, FSurfaceInfo *pSurf);
 
-boolean HWR_CompileShaders(void);
-
-void HWR_LoadAllCustomShaders(void);
-void HWR_LoadCustomShadersFromFile(UINT16 wadnum, boolean PK3);
-const char *HWR_GetShaderName(INT32 shader);
-
-extern customshaderxlat_t shaderxlat[];
+boolean HWR_ShouldUsePaletteRendering(void);
 
 extern CV_PossibleValue_t glanisotropicmode_cons_t[];
 
@@ -103,8 +97,9 @@ extern consvar_t cv_glspritebillboarding;
 extern consvar_t cv_glskydome;
 extern consvar_t cv_glfakecontrast;
 extern consvar_t cv_glslopecontrast;
-
 extern consvar_t cv_glbatching;
+extern consvar_t cv_glpaletterendering;
+extern consvar_t cv_glpalettedepth;
 
 extern consvar_t cv_glwireframe;
 
diff --git a/src/hardware/hw_md2.c b/src/hardware/hw_md2.c
index 9797a93312e2df6159f64ef472abe1beb5f2f024..ef0341bd5da7b73b5b9757a06a8eab2df521967f 100644
--- a/src/hardware/hw_md2.c
+++ b/src/hardware/hw_md2.c
@@ -390,8 +390,6 @@ static void md2_loadTexture(md2_t *model)
 	if (!grPatch->mipmap->downloaded && !grPatch->mipmap->data)
 	{
 		int w = 0, h = 0;
-		UINT32 size;
-		RGBA_t *image;
 
 #ifdef HAVE_PNG
 		grPatch->mipmap->format = PNG_Load(filename, &w, &h, grPatch);
@@ -412,13 +410,19 @@ static void md2_loadTexture(md2_t *model)
 		grPatch->mipmap->width = (UINT16)w;
 		grPatch->mipmap->height = (UINT16)h;
 
-		// Lactozilla: Apply colour cube
-		image = grPatch->mipmap->data;
-		size = w*h;
-		while (size--)
+		// for palette rendering, color cube is applied in post-processing instead of here
+		if (!HWR_ShouldUsePaletteRendering())
 		{
-			V_CubeApply(&image->s.red, &image->s.green, &image->s.blue);
-			image++;
+			UINT32 size;
+			RGBA_t *image;
+			// Lactozilla: Apply colour cube
+			image = grPatch->mipmap->data;
+			size = w*h;
+			while (size--)
+			{
+				V_CubeApply(&image->s.red, &image->s.green, &image->s.blue);
+				image++;
+			}
 		}
 	}
 	HWD.pfnSetTexture(grPatch->mipmap);
@@ -1550,7 +1554,8 @@ boolean HWR_DrawModel(gl_vissprite_t *spr)
 		p.flip = atransform.flip;
 		p.mirror = atransform.mirror;
 
-		HWD.pfnSetShader(SHADER_MODEL);	// model shader
+		if (HWR_UseShader())
+			HWD.pfnSetShader(HWR_GetShaderFromTarget(SHADER_MODEL));
 		{
 			float this_scale = FIXED_TO_FLOAT(interp.scale);
 
diff --git a/src/hardware/hw_shaders.c b/src/hardware/hw_shaders.c
new file mode 100644
index 0000000000000000000000000000000000000000..36cbb5db949c7fae0e28c82b6c0fcb6bd1dd3a73
--- /dev/null
+++ b/src/hardware/hw_shaders.c
@@ -0,0 +1,636 @@
+// SONIC ROBO BLAST 2
+//-----------------------------------------------------------------------------
+// Copyright (C) 2021 by Sonic Team Junior.
+//
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+//-----------------------------------------------------------------------------
+/// \file hw_shaders.c
+/// \brief Handles the shaders used by the game.
+
+#ifdef HWRENDER
+
+#include "hw_glob.h"
+#include "hw_drv.h"
+#include "hw_shaders.h"
+#include "../z_zone.h"
+
+// ================
+//  Shader sources
+// ================
+
+static struct {
+	const char *vertex;
+	const char *fragment;
+} const gl_shadersources[] = {
+	// Floor shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_FLOOR_FRAGMENT_SHADER},
+
+	// Wall shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_WALL_FRAGMENT_SHADER},
+
+	// Sprite shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_WALL_FRAGMENT_SHADER},
+
+	// Model shader
+	{GLSL_MODEL_VERTEX_SHADER, GLSL_MODEL_FRAGMENT_SHADER},
+
+	// Water shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_WATER_FRAGMENT_SHADER},
+
+	// Fog shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_FOG_FRAGMENT_SHADER},
+
+	// Sky shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_SKY_FRAGMENT_SHADER},
+
+	// Palette postprocess shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_PALETTE_POSTPROCESS_FRAGMENT_SHADER},
+
+	// UI colormap fade shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_UI_COLORMAP_FADE_FRAGMENT_SHADER},
+
+	// UI tinted wipe shader
+	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_UI_TINTED_WIPE_FRAGMENT_SHADER},
+
+	{NULL, NULL},
+};
+
+typedef struct
+{
+	int base_shader; // index of base shader_t
+	int custom_shader; // index of custom shader_t
+} shadertarget_t;
+
+typedef struct
+{
+	char *vertex;
+	char *fragment;
+	boolean compiled;
+} shader_t; // these are in an array and accessed by indices
+
+// the array has NUMSHADERTARGETS entries for base shaders and for custom shaders
+// the array could be expanded in the future to fit "dynamic" custom shaders that
+// aren't fixed to shader targets
+static shader_t gl_shaders[NUMSHADERTARGETS*2];
+
+static shadertarget_t gl_shadertargets[NUMSHADERTARGETS];
+
+#define WHITESPACE_CHARS " \t"
+
+#define MODEL_LIGHTING_DEFINE "#define SRB2_MODEL_LIGHTING"
+#define PALETTE_RENDERING_DEFINE "#define SRB2_PALETTE_RENDERING"
+
+// Initialize shader variables and the backend's shader system. Load the base shaders.
+// Returns false if shaders cannot be used.
+boolean HWR_InitShaders(void)
+{
+	int i;
+
+	if (!HWD.pfnInitShaders())
+		return false;
+
+	for (i = 0; i < NUMSHADERTARGETS; i++)
+	{
+		// set up string pointers for base shaders
+		gl_shaders[i].vertex = Z_StrDup(gl_shadersources[i].vertex);
+		gl_shaders[i].fragment = Z_StrDup(gl_shadersources[i].fragment);
+		// set shader target indices to correct values
+		gl_shadertargets[i].base_shader = i;
+		gl_shadertargets[i].custom_shader = -1;
+	}
+
+	HWR_CompileShaders();
+
+	return true;
+}
+
+// helper function: strstr but returns an int with the substring position
+// returns INT32_MAX if not found
+static INT32 strstr_int(const char *str1, const char *str2)
+{
+	char *location = strstr(str1, str2);
+	if (location)
+		return location - str1;
+	else
+		return INT32_MAX;
+}
+
+// Creates a preprocessed copy of the shader according to the current graphics settings
+// Returns a pointer to the results on success and NULL on failure.
+// Remember memory management of the returned string.
+static char *HWR_PreprocessShader(char *original)
+{
+	const char *line_ending = "\n";
+	int line_ending_len;
+	char *read_pos = original;
+	int original_len = strlen(original);
+	int distance_to_end = original_len;
+	int new_len;
+	char *new_shader;
+	char *write_pos;
+	char shader_glsl_version[3];
+	int version_pos = -1;
+	int version_len = 0;
+
+	if (strstr(original, "\r\n"))
+	{
+		line_ending = "\r\n";
+		// check if all line endings are same
+		while ((read_pos = strchr(read_pos, '\n')))
+		{
+			read_pos--;
+			if (*read_pos != '\r')
+			{
+				// this file contains mixed CRLF and LF line endings.
+				// treating it as a LF file during parsing should keep
+				// the results sane enough as long as the gpu driver is fine
+				// with these kinds of weirdly formatted shader sources.
+				line_ending = "\n";
+				break;
+			}
+			read_pos += 2;
+		}
+		read_pos = original;
+	}
+
+	line_ending_len = strlen(line_ending);
+
+	// Find the #version directive, if it exists. Also don't get fooled if it's
+	// inside a comment. Copy the version digits so they can be used in the preamble.
+	// Time for some string parsing :D
+
+#define STARTSWITH(str, with_what) !strncmp(str, with_what, sizeof(with_what)-1)
+#define ADVANCE(amount) read_pos += (amount); distance_to_end -= (amount);
+	while (true)
+	{
+		// we're at the start of a line or at the end of a block comment.
+		// first get any possible whitespace out of the way
+		int whitespace_len = strspn(read_pos, WHITESPACE_CHARS);
+		if (whitespace_len == distance_to_end)
+			break; // we got to the end
+		ADVANCE(whitespace_len)
+
+		if (STARTSWITH(read_pos, "#version"))
+		{
+			// found a version directive (and it's not inside a comment)
+			// now locate, verify and read the version number
+			int version_number_len;
+			version_pos = read_pos - original;
+			ADVANCE(sizeof("#version") - 1)
+			whitespace_len = strspn(read_pos, WHITESPACE_CHARS);
+			if (!whitespace_len)
+			{
+				CONS_Alert(CONS_ERROR, "HWR_PreprocessShader: Syntax error in #version. Expected space after #version, but got other text.\n");
+				return NULL;
+			}
+			else if (whitespace_len == distance_to_end)
+			{
+				CONS_Alert(CONS_ERROR, "HWR_PreprocessShader: Syntax error in #version. Expected version number, but got end of file.\n");
+				return NULL;
+			}
+			ADVANCE(whitespace_len)
+			version_number_len = strspn(read_pos, "0123456789");
+			if (!version_number_len)
+			{
+				CONS_Alert(CONS_ERROR, "HWR_PreprocessShader: Syntax error in #version. Expected version number, but got other text.\n");
+				return NULL;
+			}
+			else if (version_number_len != 3)
+			{
+				CONS_Alert(CONS_ERROR, "HWR_PreprocessShader: Syntax error in #version. Expected version with 3 digits, but got %d digits.\n", version_number_len);
+				return NULL;
+			}
+			M_Memcpy(shader_glsl_version, read_pos, 3);
+			ADVANCE(version_number_len)
+			version_len = (read_pos - original) - version_pos;
+			whitespace_len = strspn(read_pos, WHITESPACE_CHARS);
+			ADVANCE(whitespace_len)
+			if (STARTSWITH(read_pos, "es"))
+			{
+				CONS_Alert(CONS_ERROR, "HWR_PreprocessShader: Support for ES shaders is not implemented.\n");
+				return NULL;
+			}
+			break;
+		}
+		else
+		{
+			// go to next newline or end of next block comment if it starts before the newline
+			// and is not inside a line comment
+			INT32 newline_pos = strstr_int(read_pos, line_ending);
+			INT32 line_comment_pos;
+			INT32 block_comment_pos;
+			// optimization: temporarily put a null at the line ending, so strstr does not needlessly
+			// look past it since we're only interested in the current line
+			if (newline_pos != INT32_MAX)
+				read_pos[newline_pos] = '\0';
+			line_comment_pos = strstr_int(read_pos, "//");
+			block_comment_pos = strstr_int(read_pos, "/*");
+			// restore the line ending, remove the null we just put there
+			if (newline_pos != INT32_MAX)
+				read_pos[newline_pos] = line_ending[0];
+			if (line_comment_pos < block_comment_pos)
+			{
+				// line comment found, skip rest of the line
+				if (newline_pos != INT32_MAX)
+				{
+					ADVANCE(newline_pos + line_ending_len)
+				}
+				else
+				{
+					// we got to the end
+					break;
+				}
+			}
+			else if (block_comment_pos < line_comment_pos)
+			{
+				// block comment found, skip past it
+				INT32 block_comment_end;
+				ADVANCE(block_comment_pos + 2)
+				block_comment_end = strstr_int(read_pos, "*/");
+				if (block_comment_end == INT32_MAX)
+				{
+					// could also leave insertion_pos at 0 and let the GLSL compiler
+					// output an error message for this broken comment
+					CONS_Alert(CONS_ERROR, "HWR_PreprocessShader: Encountered unclosed block comment in shader.\n");
+					return NULL;
+				}
+				ADVANCE(block_comment_end + 2)
+			}
+			else if (newline_pos == INT32_MAX)
+			{
+				// we got to the end
+				break;
+			}
+			else
+			{
+				// nothing special on this line, move to the next one
+				ADVANCE(newline_pos + line_ending_len)
+			}
+		}
+	}
+#undef STARTSWITH
+#undef ADVANCE
+
+#define ADD_TO_LEN(def) new_len += sizeof(def) - 1 + line_ending_len;
+
+	// Calculate length of modified shader.
+	new_len = original_len;
+	if (cv_glmodellighting.value)
+		ADD_TO_LEN(MODEL_LIGHTING_DEFINE)
+	if (cv_glpaletterendering.value)
+		ADD_TO_LEN(PALETTE_RENDERING_DEFINE)
+
+#undef ADD_TO_LEN
+
+#define VERSION_PART "#version "
+
+	if (new_len != original_len)
+	{
+		if (version_pos != -1)
+			new_len += sizeof(VERSION_PART) - 1 + 3 + line_ending_len;
+		new_len += sizeof("#line 0") - 1 + line_ending_len;
+	}
+
+	// Allocate memory for modified shader.
+	new_shader = Z_Malloc(new_len + 1, PU_STATIC, NULL);
+
+	read_pos = original;
+	write_pos = new_shader;
+
+	if (new_len != original_len && version_pos != -1)
+	{
+		strcpy(write_pos, VERSION_PART);
+		write_pos += sizeof(VERSION_PART) - 1;
+		M_Memcpy(write_pos, shader_glsl_version, 3);
+		write_pos += 3;
+		strcpy(write_pos, line_ending);
+		write_pos += line_ending_len;
+	}
+
+#undef VERSION_PART
+
+#define WRITE_DEFINE(define) \
+	{ \
+		strcpy(write_pos, define); \
+		write_pos += sizeof(define) - 1; \
+		strcpy(write_pos, line_ending); \
+		write_pos += line_ending_len; \
+	}
+
+	// Write the defines.
+	if (cv_glmodellighting.value)
+		WRITE_DEFINE(MODEL_LIGHTING_DEFINE)
+	if (cv_glpaletterendering.value)
+		WRITE_DEFINE(PALETTE_RENDERING_DEFINE)
+
+#undef WRITE_DEFINE
+
+	// Write a #line directive, so compiler errors will report line numbers from the
+	// original shader without our preamble lines.
+	if (new_len != original_len)
+	{
+		// line numbering in the #line directive is different for versions 110-150
+		if (version_pos == -1 || shader_glsl_version[0] == '1')
+			strcpy(write_pos, "#line 0");
+		else
+			strcpy(write_pos, "#line 1");
+		write_pos += sizeof("#line 0") - 1;
+		strcpy(write_pos, line_ending);
+		write_pos += line_ending_len;
+	}
+
+	// Copy the original shader.
+	M_Memcpy(write_pos, read_pos, original_len);
+
+	// Erase the original #version directive, if it exists and was copied.
+	if (new_len != original_len && version_pos != -1)
+		memset(write_pos + version_pos, ' ', version_len);
+
+	// Terminate the new string.
+	new_shader[new_len] = '\0';
+
+	return new_shader;
+}
+
+// preprocess and compile shader at gl_shaders[index]
+static void HWR_CompileShader(int index)
+{
+	char *vertex_source = gl_shaders[index].vertex;
+	char *fragment_source = gl_shaders[index].fragment;
+
+	if (vertex_source)
+	{
+		char *preprocessed = HWR_PreprocessShader(vertex_source);
+		if (!preprocessed) return;
+		HWD.pfnLoadShader(index, preprocessed, HWD_SHADERSTAGE_VERTEX);
+	}
+	if (fragment_source)
+	{
+		char *preprocessed = HWR_PreprocessShader(fragment_source);
+		if (!preprocessed) return;
+		HWD.pfnLoadShader(index, preprocessed, HWD_SHADERSTAGE_FRAGMENT);
+	}
+
+	gl_shaders[index].compiled = HWD.pfnCompileShader(index);
+}
+
+// compile or recompile shaders
+void HWR_CompileShaders(void)
+{
+	int i;
+
+	for (i = 0; i < NUMSHADERTARGETS; i++)
+	{
+		int custom_index = gl_shadertargets[i].custom_shader;
+		HWR_CompileShader(i);
+		if (!gl_shaders[i].compiled)
+			CONS_Alert(CONS_ERROR, "HWR_CompileShaders: Compilation failed for base %s shader!\n", shaderxlat[i].type);
+		if (custom_index != -1)
+		{
+			HWR_CompileShader(custom_index);
+			if (!gl_shaders[custom_index].compiled)
+				CONS_Alert(CONS_ERROR, "HWR_CompileShaders: Recompilation failed for the custom %s shader! See the console messages above for more information.\n", shaderxlat[i].type);
+		}
+	}
+}
+
+int HWR_GetShaderFromTarget(int shader_target)
+{
+	int custom_shader = gl_shadertargets[shader_target].custom_shader;
+	// use custom shader if following are true
+	// - custom shader exists
+	// - custom shader has been compiled successfully
+	// - custom shaders are enabled
+	// - custom shaders are allowed by the server
+	if (custom_shader != -1 && gl_shaders[custom_shader].compiled &&
+		cv_glshaders.value == 1 && cv_glallowshaders.value)
+		return custom_shader;
+	else
+		return gl_shadertargets[shader_target].base_shader;
+}
+
+static inline UINT16 HWR_FindShaderDefs(UINT16 wadnum)
+{
+	UINT16 i;
+	lumpinfo_t *lump_p;
+
+	lump_p = wadfiles[wadnum]->lumpinfo;
+	for (i = 0; i < wadfiles[wadnum]->numlumps; i++, lump_p++)
+		if (memcmp(lump_p->name, "SHADERS", 7) == 0)
+			return i;
+
+	return INT16_MAX;
+}
+
+customshaderxlat_t shaderxlat[] =
+{
+	{"Flat", SHADER_FLOOR},
+	{"WallTexture", SHADER_WALL},
+	{"Sprite", SHADER_SPRITE},
+	{"Model", SHADER_MODEL},
+	{"WaterRipple", SHADER_WATER},
+	{"Fog", SHADER_FOG},
+	{"Sky", SHADER_SKY},
+	{"PalettePostprocess", SHADER_PALETTE_POSTPROCESS},
+	{"UIColormapFade", SHADER_UI_COLORMAP_FADE},
+	{"UITintedWipe", SHADER_UI_TINTED_WIPE},
+	{NULL, 0},
+};
+
+void HWR_LoadAllCustomShaders(void)
+{
+	INT32 i;
+
+	// read every custom shader
+	for (i = 0; i < numwadfiles; i++)
+		HWR_LoadCustomShadersFromFile(i, W_FileHasFolders(wadfiles[i]));
+}
+
+void HWR_LoadCustomShadersFromFile(UINT16 wadnum, boolean PK3)
+{
+	UINT16 lump;
+	char *shaderdef, *line;
+	char *stoken;
+	char *value;
+	size_t size;
+	int linenum = 1;
+	int shadertype = 0;
+	int i;
+	boolean modified_shaders[NUMSHADERTARGETS] = {0};
+
+	if (!gl_shadersavailable)
+		return;
+
+	lump = HWR_FindShaderDefs(wadnum);
+	if (lump == INT16_MAX)
+		return;
+
+	shaderdef = W_CacheLumpNumPwad(wadnum, lump, PU_CACHE);
+	size = W_LumpLengthPwad(wadnum, lump);
+
+	line = Z_Malloc(size+1, PU_STATIC, NULL);
+	M_Memcpy(line, shaderdef, size);
+	line[size] = '\0';
+
+	stoken = strtok(line, "\r\n ");
+	while (stoken)
+	{
+		if ((stoken[0] == '/' && stoken[1] == '/')
+			|| (stoken[0] == '#'))// skip comments
+		{
+			stoken = strtok(NULL, "\r\n");
+			goto skip_field;
+		}
+
+		if (!stricmp(stoken, "GLSL"))
+		{
+			value = strtok(NULL, "\r\n ");
+			if (!value)
+			{
+				CONS_Alert(CONS_WARNING, "HWR_LoadCustomShadersFromFile: Missing shader type (file %s, line %d)\n", wadfiles[wadnum]->filename, linenum);
+				stoken = strtok(NULL, "\r\n"); // skip end of line
+				goto skip_lump;
+			}
+
+			if (!stricmp(value, "VERTEX"))
+				shadertype = 1;
+			else if (!stricmp(value, "FRAGMENT"))
+				shadertype = 2;
+
+skip_lump:
+			stoken = strtok(NULL, "\r\n ");
+			linenum++;
+		}
+		else
+		{
+			value = strtok(NULL, "\r\n= ");
+			if (!value)
+			{
+				CONS_Alert(CONS_WARNING, "HWR_LoadCustomShadersFromFile: Missing shader target (file %s, line %d)\n", wadfiles[wadnum]->filename, linenum);
+				stoken = strtok(NULL, "\r\n"); // skip end of line
+				goto skip_field;
+			}
+
+			if (!shadertype)
+			{
+				CONS_Alert(CONS_ERROR, "HWR_LoadCustomShadersFromFile: Missing shader type (file %s, line %d)\n", wadfiles[wadnum]->filename, linenum);
+				Z_Free(line);
+				return;
+			}
+
+			for (i = 0; shaderxlat[i].type; i++)
+			{
+				if (!stricmp(shaderxlat[i].type, stoken))
+				{
+					size_t shader_string_length;
+					char *shader_source;
+					char *shader_lumpname;
+					UINT16 shader_lumpnum;
+					int shader_index; // index in gl_shaders
+
+					if (PK3)
+					{
+						shader_lumpname = Z_Malloc(strlen(value) + 12, PU_STATIC, NULL);
+						strcpy(shader_lumpname, "Shaders/sh_");
+						strcat(shader_lumpname, value);
+						shader_lumpnum = W_CheckNumForFullNamePK3(shader_lumpname, wadnum, 0);
+					}
+					else
+					{
+						shader_lumpname = Z_Malloc(strlen(value) + 4, PU_STATIC, NULL);
+						strcpy(shader_lumpname, "SH_");
+						strcat(shader_lumpname, value);
+						shader_lumpnum = W_CheckNumForNamePwad(shader_lumpname, wadnum, 0);
+					}
+
+					if (shader_lumpnum == INT16_MAX)
+					{
+						CONS_Alert(CONS_ERROR, "HWR_LoadCustomShadersFromFile: Missing shader source %s (file %s, line %d)\n", shader_lumpname, wadfiles[wadnum]->filename, linenum);
+						Z_Free(shader_lumpname);
+						continue;
+					}
+
+					shader_string_length = W_LumpLengthPwad(wadnum, shader_lumpnum) + 1;
+					shader_source = Z_Malloc(shader_string_length, PU_STATIC, NULL);
+					W_ReadLumpPwad(wadnum, shader_lumpnum, shader_source);
+					shader_source[shader_string_length-1] = '\0';
+
+					shader_index = shaderxlat[i].id + NUMSHADERTARGETS;
+					if (!modified_shaders[shaderxlat[i].id])
+					{
+						// this will clear any old custom shaders from previously loaded files
+						// Z_Free checks if the pointer is NULL!
+						Z_Free(gl_shaders[shader_index].vertex);
+						gl_shaders[shader_index].vertex = NULL;
+						Z_Free(gl_shaders[shader_index].fragment);
+						gl_shaders[shader_index].fragment = NULL;
+					}
+					modified_shaders[shaderxlat[i].id] = true;
+
+					if (shadertype == 1)
+					{
+						if (gl_shaders[shader_index].vertex)
+						{
+							CONS_Alert(CONS_WARNING, "HWR_LoadCustomShadersFromFile: %s is overwriting another %s vertex shader from the same addon! (file %s, line %d)\n", shader_lumpname, shaderxlat[i].type, wadfiles[wadnum]->filename, linenum);
+							Z_Free(gl_shaders[shader_index].vertex);
+						}
+						gl_shaders[shader_index].vertex = shader_source;
+					}
+					else
+					{
+						if (gl_shaders[shader_index].fragment)
+						{
+							CONS_Alert(CONS_WARNING, "HWR_LoadCustomShadersFromFile: %s is overwriting another %s fragment shader from the same addon! (file %s, line %d)\n", shader_lumpname, shaderxlat[i].type, wadfiles[wadnum]->filename, linenum);
+							Z_Free(gl_shaders[shader_index].fragment);
+						}
+						gl_shaders[shader_index].fragment = shader_source;
+					}
+
+					Z_Free(shader_lumpname);
+				}
+			}
+
+skip_field:
+			stoken = strtok(NULL, "\r\n= ");
+			linenum++;
+		}
+	}
+
+	for (i = 0; i < NUMSHADERTARGETS; i++)
+	{
+		if (modified_shaders[i])
+		{
+			int shader_index = i + NUMSHADERTARGETS; // index to gl_shaders
+			gl_shadertargets[i].custom_shader = shader_index;
+			// if only one stage (vertex/fragment) is defined, the other one
+			// is copied from the base shaders.
+			if (!gl_shaders[shader_index].fragment)
+				gl_shaders[shader_index].fragment = Z_StrDup(gl_shadersources[i].fragment);
+			if (!gl_shaders[shader_index].vertex)
+				gl_shaders[shader_index].vertex = Z_StrDup(gl_shadersources[i].vertex);
+			HWR_CompileShader(shader_index);
+			if (!gl_shaders[shader_index].compiled)
+				CONS_Alert(CONS_ERROR, "HWR_LoadCustomShadersFromFile: A compilation error occured for the %s shader in file %s. See the console messages above for more information.\n", shaderxlat[i].type, wadfiles[wadnum]->filename);
+		}
+	}
+
+	Z_Free(line);
+	return;
+}
+
+const char *HWR_GetShaderName(INT32 shader)
+{
+	INT32 i;
+
+	for (i = 0; shaderxlat[i].type; i++)
+	{
+		if (shaderxlat[i].id == shader)
+			return shaderxlat[i].type;
+	}
+
+	return "Unknown";
+}
+
+#endif // HWRENDER
diff --git a/src/hardware/hw_shaders.h b/src/hardware/hw_shaders.h
new file mode 100644
index 0000000000000000000000000000000000000000..09395dbd0257d620b67d2ee9a57159f0fbca4e3a
--- /dev/null
+++ b/src/hardware/hw_shaders.h
@@ -0,0 +1,424 @@
+// SONIC ROBO BLAST 2
+//-----------------------------------------------------------------------------
+// Copyright (C) 2021 by Sonic Team Junior.
+//
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+//-----------------------------------------------------------------------------
+/// \file hw_shaders.h
+/// \brief Handles the shaders used by the game.
+
+#ifndef _HW_SHADERS_H_
+#define _HW_SHADERS_H_
+
+#include "../doomtype.h"
+
+// ================
+//  Vertex shaders
+// ================
+
+//
+// Generic vertex shader
+//
+
+#define GLSL_DEFAULT_VERTEX_SHADER \
+	"void main()\n" \
+	"{\n" \
+		"gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;\n" \
+		"gl_FrontColor = gl_Color;\n" \
+		"gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n" \
+		"gl_ClipVertex = gl_ModelViewMatrix * gl_Vertex;\n" \
+	"}\0"
+
+// replicates the way fixed function lighting is used by the model lighting option,
+// stores the lighting result to gl_Color
+// (ambient lighting of 0.75 and diffuse lighting from above)
+#define GLSL_MODEL_VERTEX_SHADER \
+	"void main()\n" \
+	"{\n" \
+		"#ifdef SRB2_MODEL_LIGHTING\n" \
+		"float nDotVP = dot(gl_Normal, vec3(0, 1, 0));\n" \
+		"float light = min(0.75 + max(nDotVP, 0.0), 1.0);\n" \
+		"gl_FrontColor = vec4(light, light, light, 1.0);\n" \
+		"#else\n" \
+		"gl_FrontColor = gl_Color;\n" \
+		"#endif\n" \
+		"gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;\n" \
+		"gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n" \
+		"gl_ClipVertex = gl_ModelViewMatrix * gl_Vertex;\n" \
+	"}\0"
+
+// ==================
+//  Fragment shaders
+// ==================
+
+//
+// Generic fragment shader
+//
+
+#define GLSL_DEFAULT_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"void main(void) {\n" \
+		"gl_FragColor = texture2D(tex, gl_TexCoord[0].st) * poly_color;\n" \
+	"}\0"
+
+//
+// Software fragment shader
+//
+
+// Include GLSL_FLOOR_FUDGES or GLSL_WALL_FUDGES or define the fudges in shaders that use this macro.
+#define GLSL_DOOM_COLORMAP \
+	"float R_DoomColormap(float light, float z)\n" \
+	"{\n" \
+		"float lightnum = clamp(light / 17.0, 0.0, 15.0);\n" \
+		"float lightz = clamp(z / 16.0, 0.0, 127.0);\n" \
+		"float startmap = (15.0 - lightnum) * 4.0;\n" \
+		"float scale = 160.0 / (lightz + 1.0);\n" \
+		"float cap = (155.0 - light) * 0.26;\n" \
+		"return max(startmap * STARTMAP_FUDGE - scale * 0.5 * SCALE_FUDGE, cap);\n" \
+	"}\n"
+// lighting cap adjustment:
+// first num (155.0), increase to make it start to go dark sooner
+// second num (0.26), increase to make it go dark faster
+
+#define GLSL_DOOM_LIGHT_EQUATION \
+	"float R_DoomLightingEquation(float light)\n" \
+	"{\n" \
+		"float z = gl_FragCoord.z / gl_FragCoord.w;\n" \
+		"float colormap = floor(R_DoomColormap(light, z)) + 0.5;\n" \
+		"return clamp(colormap, 0.0, 31.0) / 32.0;\n" \
+	"}\n"
+
+#define GLSL_SOFTWARE_TINT_EQUATION \
+	"if (tint_color.a > 0.0) {\n" \
+		"float color_bright = sqrt((base_color.r * base_color.r) + (base_color.g * base_color.g) + (base_color.b * base_color.b));\n" \
+		"float strength = sqrt(9.0 * tint_color.a);\n" \
+		"final_color.r = clamp((color_bright * (tint_color.r * strength)) + (base_color.r * (1.0 - strength)), 0.0, 1.0);\n" \
+		"final_color.g = clamp((color_bright * (tint_color.g * strength)) + (base_color.g * (1.0 - strength)), 0.0, 1.0);\n" \
+		"final_color.b = clamp((color_bright * (tint_color.b * strength)) + (base_color.b * (1.0 - strength)), 0.0, 1.0);\n" \
+	"}\n"
+
+#define GLSL_SOFTWARE_FADE_EQUATION \
+	"float darkness = R_DoomLightingEquation(lighting);\n" \
+	"if (fade_start != 0.0 || fade_end != 31.0) {\n" \
+		"float fs = fade_start / 31.0;\n" \
+		"float fe = fade_end / 31.0;\n" \
+		"float fd = fe - fs;\n" \
+		"darkness = clamp((darkness - fs) * (1.0 / fd), 0.0, 1.0);\n" \
+	"}\n" \
+	"final_color = mix(final_color, fade_color, darkness);\n"
+
+#define GLSL_PALETTE_RENDERING \
+	"float tex_pal_idx = texture3D(palette_lookup_tex, vec3((texel * 63.0 + 0.5) / 64.0))[0] * 255.0;\n" \
+	"float z = gl_FragCoord.z / gl_FragCoord.w;\n" \
+	"float light_y = clamp(floor(R_DoomColormap(lighting, z)), 0.0, 31.0);\n" \
+	"vec2 lighttable_coord = vec2((tex_pal_idx + 0.5) / 256.0, (light_y + 0.5) / 32.0);\n" \
+	"vec4 final_color = texture2D(lighttable_tex, lighttable_coord);\n" \
+	"final_color.a = texel.a * poly_color.a;\n" \
+	"gl_FragColor = final_color;\n" \
+
+#define GLSL_SOFTWARE_FRAGMENT_SHADER \
+	"#ifdef SRB2_PALETTE_RENDERING\n" \
+	"uniform sampler2D tex;\n" \
+	"uniform sampler3D palette_lookup_tex;\n" \
+	"uniform sampler2D lighttable_tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform float lighting;\n" \
+	GLSL_DOOM_COLORMAP \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		GLSL_PALETTE_RENDERING \
+	"}\n" \
+	"#else\n" \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform vec4 tint_color;\n" \
+	"uniform vec4 fade_color;\n" \
+	"uniform float lighting;\n" \
+	"uniform float fade_start;\n" \
+	"uniform float fade_end;\n" \
+	GLSL_DOOM_COLORMAP \
+	GLSL_DOOM_LIGHT_EQUATION \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"vec4 base_color = texel * poly_color;\n" \
+		"vec4 final_color = base_color;\n" \
+		GLSL_SOFTWARE_TINT_EQUATION \
+		GLSL_SOFTWARE_FADE_EQUATION \
+		"final_color.a = texel.a * poly_color.a;\n" \
+		"gl_FragColor = final_color;\n" \
+	"}\n" \
+	"#endif\0"
+
+// hand tuned adjustments for light level calculation
+#define GLSL_FLOOR_FUDGES \
+	"#define STARTMAP_FUDGE 1.06\n" \
+	"#define SCALE_FUDGE 1.15\n"
+
+#define GLSL_WALL_FUDGES \
+	"#define STARTMAP_FUDGE 1.05\n" \
+	"#define SCALE_FUDGE 2.2\n"
+
+#define GLSL_FLOOR_FRAGMENT_SHADER \
+	GLSL_FLOOR_FUDGES \
+	GLSL_SOFTWARE_FRAGMENT_SHADER
+
+#define GLSL_WALL_FRAGMENT_SHADER \
+	GLSL_WALL_FUDGES \
+	GLSL_SOFTWARE_FRAGMENT_SHADER
+
+// same as above but multiplies results with the lighting value from the
+// accompanying vertex shader (stored in gl_Color) if model lighting is enabled
+#define GLSL_MODEL_FRAGMENT_SHADER \
+	GLSL_WALL_FUDGES \
+	"#ifdef SRB2_PALETTE_RENDERING\n" \
+	"uniform sampler2D tex;\n" \
+	"uniform sampler3D palette_lookup_tex;\n" \
+	"uniform sampler2D lighttable_tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform float lighting;\n" \
+	GLSL_DOOM_COLORMAP \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"#ifdef SRB2_MODEL_LIGHTING\n" \
+		"texel *= gl_Color;\n" \
+		"#endif\n" \
+		GLSL_PALETTE_RENDERING \
+	"}\n" \
+	"#else\n" \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform vec4 tint_color;\n" \
+	"uniform vec4 fade_color;\n" \
+	"uniform float lighting;\n" \
+	"uniform float fade_start;\n" \
+	"uniform float fade_end;\n" \
+	GLSL_DOOM_COLORMAP \
+	GLSL_DOOM_LIGHT_EQUATION \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"vec4 base_color = texel * poly_color;\n" \
+		"vec4 final_color = base_color;\n" \
+		GLSL_SOFTWARE_TINT_EQUATION \
+		GLSL_SOFTWARE_FADE_EQUATION \
+		"#ifdef SRB2_MODEL_LIGHTING\n" \
+		"final_color *= gl_Color;\n" \
+		"#endif\n" \
+		"final_color.a = texel.a * poly_color.a;\n" \
+		"gl_FragColor = final_color;\n" \
+	"}\n" \
+	"#endif\0"
+
+//
+// Water surface shader
+//
+// Mostly guesstimated, rather than the rest being built off Software science.
+// Still needs to distort things underneath/around the water...
+//
+
+#define GLSL_WATER_TEXEL \
+	"float water_z = (gl_FragCoord.z / gl_FragCoord.w) / 2.0;\n" \
+	"float a = -pi * (water_z * freq) + (leveltime * speed);\n" \
+	"float sdistort = sin(a) * amp;\n" \
+	"float cdistort = cos(a) * amp;\n" \
+	"vec4 texel = texture2D(tex, vec2(gl_TexCoord[0].s - sdistort, gl_TexCoord[0].t - cdistort));\n"
+
+#define GLSL_WATER_FRAGMENT_SHADER \
+	GLSL_FLOOR_FUDGES \
+	"const float freq = 0.025;\n" \
+	"const float amp = 0.025;\n" \
+	"const float speed = 2.0;\n" \
+	"const float pi = 3.14159;\n" \
+	"#ifdef SRB2_PALETTE_RENDERING\n" \
+	"uniform sampler2D tex;\n" \
+	"uniform sampler3D palette_lookup_tex;\n" \
+	"uniform sampler2D lighttable_tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform float lighting;\n" \
+	"uniform float leveltime;\n" \
+	GLSL_DOOM_COLORMAP \
+	"void main(void) {\n" \
+		GLSL_WATER_TEXEL \
+		GLSL_PALETTE_RENDERING \
+	"}\n" \
+	"#else\n" \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform vec4 tint_color;\n" \
+	"uniform vec4 fade_color;\n" \
+	"uniform float lighting;\n" \
+	"uniform float fade_start;\n" \
+	"uniform float fade_end;\n" \
+	"uniform float leveltime;\n" \
+	GLSL_DOOM_COLORMAP \
+	GLSL_DOOM_LIGHT_EQUATION \
+	"void main(void) {\n" \
+		GLSL_WATER_TEXEL \
+		"vec4 base_color = texel * poly_color;\n" \
+		"vec4 final_color = base_color;\n" \
+		GLSL_SOFTWARE_TINT_EQUATION \
+		GLSL_SOFTWARE_FADE_EQUATION \
+		"final_color.a = texel.a * poly_color.a;\n" \
+		"gl_FragColor = final_color;\n" \
+	"}\n" \
+	"#endif\0"
+
+//
+// Fog block shader
+//
+// Alpha of the planes themselves are still slightly off -- see HWR_FogBlockAlpha
+//
+
+// The floor fudges are used, but should the wall fudges be used instead? or something inbetween?
+// or separate values for floors and walls? (need to change more than this shader for that)
+#define GLSL_FOG_FRAGMENT_SHADER \
+	GLSL_FLOOR_FUDGES \
+	"uniform vec4 tint_color;\n" \
+	"uniform vec4 fade_color;\n" \
+	"uniform float lighting;\n" \
+	"uniform float fade_start;\n" \
+	"uniform float fade_end;\n" \
+	GLSL_DOOM_COLORMAP \
+	GLSL_DOOM_LIGHT_EQUATION \
+	"void main(void) {\n" \
+		"vec4 base_color = gl_Color;\n" \
+		"vec4 final_color = base_color;\n" \
+		GLSL_SOFTWARE_TINT_EQUATION \
+		GLSL_SOFTWARE_FADE_EQUATION \
+		"gl_FragColor = final_color;\n" \
+	"}\0"
+
+//
+// Sky fragment shader
+// Modulates poly_color with gl_Color
+//
+#define GLSL_SKY_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"void main(void) {\n" \
+		"gl_FragColor = texture2D(tex, gl_TexCoord[0].st) * gl_Color * poly_color;\n" \
+	"}\0"
+
+// Shader for the palette rendering postprocess step
+#define GLSL_PALETTE_POSTPROCESS_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform sampler3D palette_lookup_tex;\n" \
+	"uniform sampler1D palette_tex;\n" \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"float tex_pal_idx = texture3D(palette_lookup_tex, vec3((texel * 63.0 + 0.5) / 64.0))[0] * 255.0;\n" \
+		"float palette_coord = (tex_pal_idx + 0.5) / 256.0;\n" \
+		"vec4 final_color = texture1D(palette_tex, palette_coord);\n" \
+		"gl_FragColor = final_color;\n" \
+	"}\0"
+
+// Applies a palettized colormap fade to tex
+#define GLSL_UI_COLORMAP_FADE_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform float lighting;\n" \
+	"uniform sampler3D palette_lookup_tex;\n" \
+	"uniform sampler2D lighttable_tex;\n" \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"float tex_pal_idx = texture3D(palette_lookup_tex, vec3((texel * 63.0 + 0.5) / 64.0))[0] * 255.0;\n" \
+		"vec2 lighttable_coord = vec2((tex_pal_idx + 0.5) / 256.0, (lighting + 0.5) / 32.0);\n" \
+		"gl_FragColor = texture2D(lighttable_tex, lighttable_coord);\n" \
+	"}\0"
+
+// For wipes that use additive and subtractive blending.
+// alpha_factor = 31 * 8 / 10 = 24.8
+// Calculated based on the use of the "fade" variable from the GETCOLOR macro
+// in r_data.c:R_CreateFadeColormaps.
+// However this value created some ugliness in fades to white (special stage entry)
+// while palette rendering is enabled, so I raised the value just a bit.
+#define GLSL_UI_TINTED_WIPE_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"const float alpha_factor = 24.875;\n" \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"vec4 final_color = poly_color;\n" \
+		"float alpha = texel.a;\n" \
+		"if (final_color.a >= 0.5)\n" \
+			"alpha = 1.0 - alpha;\n" \
+		"alpha *= alpha_factor;\n" \
+		"final_color *= alpha;\n" \
+		"final_color.a = 1.0;\n" \
+		"gl_FragColor = final_color;\n" \
+	"}\0"
+
+//
+// Generic vertex shader
+//
+
+#define GLSL_FALLBACK_VERTEX_SHADER \
+	"void main()\n" \
+	"{\n" \
+		"gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;\n" \
+		"gl_FrontColor = gl_Color;\n" \
+		"gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n" \
+		"gl_ClipVertex = gl_ModelViewMatrix * gl_Vertex;\n" \
+	"}\0"
+
+//
+// Generic fragment shader
+//
+
+#define GLSL_FALLBACK_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"void main(void) {\n" \
+		"gl_FragColor = texture2D(tex, gl_TexCoord[0].st) * poly_color;\n" \
+	"}\0"
+
+//
+// Software fragment shader
+//
+
+#define GLSL_SOFTWARE_FADE_EQUATION \
+	"float darkness = R_DoomLightingEquation(lighting);\n" \
+	"if (fade_start != 0.0 || fade_end != 31.0) {\n" \
+		"float fs = fade_start / 31.0;\n" \
+		"float fe = fade_end / 31.0;\n" \
+		"float fd = fe - fs;\n" \
+		"darkness = clamp((darkness - fs) * (1.0 / fd), 0.0, 1.0);\n" \
+	"}\n" \
+	"final_color = mix(final_color, fade_color, darkness);\n"
+
+// same as above but multiplies results with the lighting value from the
+// accompanying vertex shader (stored in gl_Color)
+#define GLSL_SOFTWARE_MODEL_LIGHTING_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"uniform vec4 tint_color;\n" \
+	"uniform vec4 fade_color;\n" \
+	"uniform float lighting;\n" \
+	"uniform float fade_start;\n" \
+	"uniform float fade_end;\n" \
+	GLSL_DOOM_COLORMAP \
+	GLSL_DOOM_LIGHT_EQUATION \
+	"void main(void) {\n" \
+		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
+		"vec4 base_color = texel * poly_color;\n" \
+		"vec4 final_color = base_color;\n" \
+		GLSL_SOFTWARE_TINT_EQUATION \
+		GLSL_SOFTWARE_FADE_EQUATION \
+		"final_color *= gl_Color;\n" \
+		"final_color.a = texel.a * poly_color.a;\n" \
+		"gl_FragColor = final_color;\n" \
+	"}\0"
+
+//
+// Sky fragment shader
+// Modulates poly_color with gl_Color
+//
+#define GLSL_SKY_FRAGMENT_SHADER \
+	"uniform sampler2D tex;\n" \
+	"uniform vec4 poly_color;\n" \
+	"void main(void) {\n" \
+		"gl_FragColor = texture2D(tex, gl_TexCoord[0].st) * gl_Color * poly_color;\n" \
+	"}\0"
+
+#endif
diff --git a/src/hardware/r_opengl/r_opengl.c b/src/hardware/r_opengl/r_opengl.c
index ea831e41dee3b7afff7eed47d4ead3f858870648..acd09f614318f433567a5764f2cf453d26315e9d 100644
--- a/src/hardware/r_opengl/r_opengl.c
+++ b/src/hardware/r_opengl/r_opengl.c
@@ -24,6 +24,7 @@
 #include "../../r_local.h" // For rendertimefrac, used for the leveltime shader uniform
 #include "r_opengl.h"
 #include "r_vbo.h"
+#include "../hw_shaders.h"
 
 #if defined (HWRENDER) && !defined (NOROPENGL)
 
@@ -35,12 +36,21 @@ struct GLRGBAFloat
 	GLfloat alpha;
 };
 typedef struct GLRGBAFloat GLRGBAFloat;
-static const GLubyte white[4] = { 255, 255, 255, 255 };
+
+// lighttable list item
+struct LTListItem
+{
+	UINT32 id;
+	struct LTListItem *next;
+};
+typedef struct LTListItem LTListItem;
 
 // ==========================================================================
 //                                                                  CONSTANTS
 // ==========================================================================
 
+static const GLubyte white[4] = { 255, 255, 255, 255 };
+
 // With OpenGL 1.1+, the first texture should be 1
 static GLuint NOTEXTURE_NUM = 0;
 
@@ -56,6 +66,7 @@ static float NEAR_CLIPPING_PLANE =   NZCLIP_PLANE;
 
 
 static  GLuint      tex_downloaded  = 0;
+static  GLuint      lt_downloaded   = 0; // currently bound lighttable texture
 static  GLfloat     fov             = 90.0f;
 static  FBITFIELD   CurrentPolyFlags;
 
@@ -66,7 +77,15 @@ static FTextureInfo *TexCacheHead = NULL;
 static RGBA_t *textureBuffer = NULL;
 static size_t textureBufferSize = 0;
 
-RGBA_t  myPaletteData[256];
+// Linked list of all lighttables.
+static LTListItem *LightTablesTail = NULL;
+static LTListItem *LightTablesHead = NULL;
+
+static RGBA_t screenPalette[256] = {0}; // the palette for the postprocessing step in palette rendering
+static GLuint screenPaletteTex = 0; // 1D texture containing the screen palette
+static GLuint paletteLookupTex = 0; // 3D texture containing RGB -> palette index lookup table
+RGBA_t  myPaletteData[256]; // the palette for converting textures to RGBA
+
 GLint   screen_width    = 0;               // used by Draw2DLine()
 GLint   screen_height   = 0;
 GLbyte  screen_depth    = 0;
@@ -91,10 +110,7 @@ static GLint   viewport[4];
 //			flush all of the stored textures, leaving them unavailable at times such as between levels
 //			These need to start at 0 and be set to their number, and be reset to 0 when deleted so that intel GPUs
 //			can know when the textures aren't there, as textures are always considered resident in their virtual memory
-static GLuint screentexture = 0;
-static GLuint startScreenWipe = 0;
-static GLuint endScreenWipe = 0;
-static GLuint finalScreenTexture = 0;
+static GLuint screenTextures[NUMSCREENTEXTURES] = {0};
 
 // shortcut for ((float)1/i)
 static const GLfloat byte2float[256] = {
@@ -378,10 +394,14 @@ typedef void (APIENTRY * PFNglTexEnvi) (GLenum target, GLenum pname, GLint param
 static PFNglTexEnvi pglTexEnvi;
 typedef void (APIENTRY * PFNglTexParameteri) (GLenum target, GLenum pname, GLint param);
 static PFNglTexParameteri pglTexParameteri;
+typedef void (APIENTRY * PFNglTexImage1D) (GLenum target, GLint level, GLint internalFormat, GLsizei width, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+static PFNglTexImage1D pglTexImage1D;
 typedef void (APIENTRY * PFNglTexImage2D) (GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
 static PFNglTexImage2D pglTexImage2D;
 typedef void (APIENTRY * PFNglTexSubImage2D) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
 static PFNglTexSubImage2D pglTexSubImage2D;
+typedef void (APIENTRY * PFNglGetTexImage) (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels);
+static PFNglGetTexImage pglGetTexImage;
 
 /* 1.1 functions */
 /* texture objects */ //GL_EXT_texture_object
@@ -401,6 +421,10 @@ static PFNglCopyTexSubImage2D pglCopyTexSubImage2D;
 typedef GLint (APIENTRY * PFNgluBuild2DMipmaps) (GLenum target, GLint internalFormat, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *data);
 static PFNgluBuild2DMipmaps pgluBuild2DMipmaps;
 
+/* 1.2 functions for 3D textures */
+typedef void (APIENTRY * PFNglTexImage3D) (GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+static PFNglTexImage3D pglTexImage3D;
+
 /* 1.3 functions for multitexturing */
 typedef void (APIENTRY *PFNglActiveTexture) (GLenum);
 static PFNglActiveTexture pglActiveTexture;
@@ -445,6 +469,9 @@ static PFNglBlendEquation pglBlendEquation;
 #ifndef GL_TEXTURE1
 #define GL_TEXTURE1 0x84C1
 #endif
+#ifndef GL_TEXTURE2
+#define GL_TEXTURE2 0x84C2
+#endif
 
 /* 1.5 Parms */
 #ifndef GL_ARRAY_BUFFER
@@ -517,8 +544,10 @@ boolean SetupGLfunc(void)
 
 	GETOPENGLFUNC(pglTexEnvi, glTexEnvi)
 	GETOPENGLFUNC(pglTexParameteri, glTexParameteri)
+	GETOPENGLFUNC(pglTexImage1D, glTexImage1D)
 	GETOPENGLFUNC(pglTexImage2D, glTexImage2D)
 	GETOPENGLFUNC(pglTexSubImage2D, glTexSubImage2D)
+	GETOPENGLFUNC(pglGetTexImage, glGetTexImage)
 
 	GETOPENGLFUNC(pglGenTextures, glGenTextures)
 	GETOPENGLFUNC(pglDeleteTextures, glDeleteTextures)
@@ -534,7 +563,7 @@ boolean SetupGLfunc(void)
 }
 
 static boolean gl_shadersenabled = false;
-static hwdshaderoption_t gl_allowshaders = HWD_SHADEROPTION_OFF;
+static INT32 gl_allowshaders = 0;
 
 #ifdef GL_SHADERS
 typedef GLuint 	(APIENTRY *PFNglCreateShader)		(GLenum);
@@ -592,7 +621,12 @@ typedef enum
 	gluniform_fade_start,
 	gluniform_fade_end,
 
-	// misc. (custom shaders)
+	// palette rendering
+	gluniform_palette_tex, // 1d texture containing a palette
+	gluniform_palette_lookup_tex, // 3d texture containing the rgb->index lookup table
+	gluniform_lighttable_tex, // 2d texture containing a light table
+
+	// misc.
 	gluniform_leveltime,
 
 	gluniform_max,
@@ -600,14 +634,15 @@ typedef enum
 
 typedef struct gl_shader_s
 {
+	char *vertex_shader;
+	char *fragment_shader;
 	GLuint program;
 	GLint uniforms[gluniform_max+1];
-	boolean custom;
 } gl_shader_t;
 
 static gl_shader_t gl_shaders[HWR_MAXSHADERS];
-static gl_shader_t gl_usershaders[HWR_MAXSHADERS];
-static shadersource_t gl_customshaders[HWR_MAXSHADERS];
+
+static gl_shader_t gl_fallback_shader;
 
 // 09102020
 typedef struct gl_shaderstate_s
@@ -623,253 +658,19 @@ static gl_shaderstate_t gl_shaderstate;
 static float shader_leveltime = 0;
 
 // Lactozilla: Shader functions
-static boolean Shader_CompileProgram(gl_shader_t *shader, GLint i, const GLchar *vert_shader, const GLchar *frag_shader);
+static boolean Shader_CompileProgram(gl_shader_t *shader, GLint i);
 static void Shader_CompileError(const char *message, GLuint program, INT32 shadernum);
 static void Shader_SetUniforms(FSurfaceInfo *Surface, GLRGBAFloat *poly, GLRGBAFloat *tint, GLRGBAFloat *fade);
 
 static GLRGBAFloat shader_defaultcolor = {1.0f, 1.0f, 1.0f, 1.0f};
 
-// ================
-//  Vertex shaders
-// ================
-
-//
-// Generic vertex shader
-//
-
-#define GLSL_DEFAULT_VERTEX_SHADER \
-	"void main()\n" \
-	"{\n" \
-		"gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;\n" \
-		"gl_FrontColor = gl_Color;\n" \
-		"gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n" \
-		"gl_ClipVertex = gl_ModelViewMatrix * gl_Vertex;\n" \
-	"}\0"
-
-// replicates the way fixed function lighting is used by the model lighting option,
-// stores the lighting result to gl_Color
-// (ambient lighting of 0.75 and diffuse lighting from above)
-#define GLSL_MODEL_LIGHTING_VERTEX_SHADER \
-	"void main()\n" \
-	"{\n" \
-		"float nDotVP = dot(gl_Normal, vec3(0, 1, 0));\n" \
-		"float light = 0.75 + max(nDotVP, 0.0);\n" \
-		"gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;\n" \
-		"gl_FrontColor = vec4(light, light, light, 1.0);\n" \
-		"gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n" \
-		"gl_ClipVertex = gl_ModelViewMatrix * gl_Vertex;\n" \
-	"}\0"
-
-// ==================
-//  Fragment shaders
-// ==================
-
-//
-// Generic fragment shader
-//
-
-#define GLSL_DEFAULT_FRAGMENT_SHADER \
-	"uniform sampler2D tex;\n" \
-	"uniform vec4 poly_color;\n" \
-	"void main(void) {\n" \
-		"gl_FragColor = texture2D(tex, gl_TexCoord[0].st) * poly_color;\n" \
-	"}\0"
-
-//
-// Software fragment shader
-//
-
-#define GLSL_DOOM_COLORMAP \
-	"float R_DoomColormap(float light, float z)\n" \
-	"{\n" \
-		"float lightnum = clamp(light / 17.0, 0.0, 15.0);\n" \
-		"float lightz = clamp(z / 16.0, 0.0, 127.0);\n" \
-		"float startmap = (15.0 - lightnum) * 4.0;\n" \
-		"float scale = 160.0 / (lightz + 1.0);\n" \
-		"return startmap - scale * 0.5;\n" \
-	"}\n"
-
-#define GLSL_DOOM_LIGHT_EQUATION \
-	"float R_DoomLightingEquation(float light)\n" \
-	"{\n" \
-		"float z = gl_FragCoord.z / gl_FragCoord.w;\n" \
-		"float colormap = floor(R_DoomColormap(light, z)) + 0.5;\n" \
-		"return clamp(colormap, 0.0, 31.0) / 32.0;\n" \
-	"}\n"
-
-#define GLSL_SOFTWARE_TINT_EQUATION \
-	"if (tint_color.a > 0.0) {\n" \
-		"float color_bright = sqrt((base_color.r * base_color.r) + (base_color.g * base_color.g) + (base_color.b * base_color.b));\n" \
-		"float strength = sqrt(tint_color.a);\n" \
-		"final_color.r = clamp((color_bright * (tint_color.r * strength)) + (base_color.r * (1.0 - strength)), 0.0, 1.0);\n" \
-		"final_color.g = clamp((color_bright * (tint_color.g * strength)) + (base_color.g * (1.0 - strength)), 0.0, 1.0);\n" \
-		"final_color.b = clamp((color_bright * (tint_color.b * strength)) + (base_color.b * (1.0 - strength)), 0.0, 1.0);\n" \
-	"}\n"
-
-#define GLSL_SOFTWARE_FADE_EQUATION \
-	"float darkness = R_DoomLightingEquation(lighting);\n" \
-	"if (fade_start != 0.0 || fade_end != 31.0) {\n" \
-		"float fs = fade_start / 31.0;\n" \
-		"float fe = fade_end / 31.0;\n" \
-		"float fd = fe - fs;\n" \
-		"darkness = clamp((darkness - fs) * (1.0 / fd), 0.0, 1.0);\n" \
-	"}\n" \
-	"final_color = mix(final_color, fade_color, darkness);\n"
-
-#define GLSL_SOFTWARE_FRAGMENT_SHADER \
-	"uniform sampler2D tex;\n" \
-	"uniform vec4 poly_color;\n" \
-	"uniform vec4 tint_color;\n" \
-	"uniform vec4 fade_color;\n" \
-	"uniform float lighting;\n" \
-	"uniform float fade_start;\n" \
-	"uniform float fade_end;\n" \
-	GLSL_DOOM_COLORMAP \
-	GLSL_DOOM_LIGHT_EQUATION \
-	"void main(void) {\n" \
-		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
-		"vec4 base_color = texel * poly_color;\n" \
-		"vec4 final_color = base_color;\n" \
-		GLSL_SOFTWARE_TINT_EQUATION \
-		GLSL_SOFTWARE_FADE_EQUATION \
-		"final_color.a = texel.a * poly_color.a;\n" \
-		"gl_FragColor = final_color;\n" \
-	"}\0"
-
-// same as above but multiplies results with the lighting value from the
-// accompanying vertex shader (stored in gl_Color)
-#define GLSL_SOFTWARE_MODEL_LIGHTING_FRAGMENT_SHADER \
-	"uniform sampler2D tex;\n" \
-	"uniform vec4 poly_color;\n" \
-	"uniform vec4 tint_color;\n" \
-	"uniform vec4 fade_color;\n" \
-	"uniform float lighting;\n" \
-	"uniform float fade_start;\n" \
-	"uniform float fade_end;\n" \
-	GLSL_DOOM_COLORMAP \
-	GLSL_DOOM_LIGHT_EQUATION \
-	"void main(void) {\n" \
-		"vec4 texel = texture2D(tex, gl_TexCoord[0].st);\n" \
-		"vec4 base_color = texel * poly_color;\n" \
-		"vec4 final_color = base_color;\n" \
-		GLSL_SOFTWARE_TINT_EQUATION \
-		GLSL_SOFTWARE_FADE_EQUATION \
-		"final_color *= gl_Color;\n" \
-		"final_color.a = texel.a * poly_color.a;\n" \
-		"gl_FragColor = final_color;\n" \
-	"}\0"
-
-//
-// Water surface shader
-//
-// Mostly guesstimated, rather than the rest being built off Software science.
-// Still needs to distort things underneath/around the water...
-//
-
-#define GLSL_WATER_FRAGMENT_SHADER \
-	"uniform sampler2D tex;\n" \
-	"uniform vec4 poly_color;\n" \
-	"uniform vec4 tint_color;\n" \
-	"uniform vec4 fade_color;\n" \
-	"uniform float lighting;\n" \
-	"uniform float fade_start;\n" \
-	"uniform float fade_end;\n" \
-	"uniform float leveltime;\n" \
-	"const float freq = 0.025;\n" \
-	"const float amp = 0.025;\n" \
-	"const float speed = 2.0;\n" \
-	"const float pi = 3.14159;\n" \
-	GLSL_DOOM_COLORMAP \
-	GLSL_DOOM_LIGHT_EQUATION \
-	"void main(void) {\n" \
-		"float z = (gl_FragCoord.z / gl_FragCoord.w) / 2.0;\n" \
-		"float a = -pi * (z * freq) + (leveltime * speed);\n" \
-		"float sdistort = sin(a) * amp;\n" \
-		"float cdistort = cos(a) * amp;\n" \
-		"vec4 texel = texture2D(tex, vec2(gl_TexCoord[0].s - sdistort, gl_TexCoord[0].t - cdistort));\n" \
-		"vec4 base_color = texel * poly_color;\n" \
-		"vec4 final_color = base_color;\n" \
-		GLSL_SOFTWARE_TINT_EQUATION \
-		GLSL_SOFTWARE_FADE_EQUATION \
-		"final_color.a = texel.a * poly_color.a;\n" \
-		"gl_FragColor = final_color;\n" \
-	"}\0"
-
-//
-// Fog block shader
-//
-// Alpha of the planes themselves are still slightly off -- see HWR_FogBlockAlpha
-//
-
-#define GLSL_FOG_FRAGMENT_SHADER \
-	"uniform vec4 tint_color;\n" \
-	"uniform vec4 fade_color;\n" \
-	"uniform float lighting;\n" \
-	"uniform float fade_start;\n" \
-	"uniform float fade_end;\n" \
-	GLSL_DOOM_COLORMAP \
-	GLSL_DOOM_LIGHT_EQUATION \
-	"void main(void) {\n" \
-		"vec4 base_color = gl_Color;\n" \
-		"vec4 final_color = base_color;\n" \
-		GLSL_SOFTWARE_TINT_EQUATION \
-		GLSL_SOFTWARE_FADE_EQUATION \
-		"gl_FragColor = final_color;\n" \
-	"}\0"
-
-//
-// Sky fragment shader
-// Modulates poly_color with gl_Color
-//
-#define GLSL_SKY_FRAGMENT_SHADER \
-	"uniform sampler2D tex;\n" \
-	"uniform vec4 poly_color;\n" \
-	"void main(void) {\n" \
-		"gl_FragColor = texture2D(tex, gl_TexCoord[0].st) * gl_Color * poly_color;\n" \
-	"}\0"
-
-// ================
-//  Shader sources
-// ================
-
-static struct {
-	const char *vertex;
-	const char *fragment;
-} const gl_shadersources[] = {
-	// Default shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_DEFAULT_FRAGMENT_SHADER},
-
-	// Floor shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_SOFTWARE_FRAGMENT_SHADER},
-
-	// Wall shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_SOFTWARE_FRAGMENT_SHADER},
-
-	// Sprite shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_SOFTWARE_FRAGMENT_SHADER},
-
-	// Model shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_SOFTWARE_FRAGMENT_SHADER},
-
-	// Model shader + diffuse lighting from above
-	{GLSL_MODEL_LIGHTING_VERTEX_SHADER, GLSL_SOFTWARE_MODEL_LIGHTING_FRAGMENT_SHADER},
-
-	// Water shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_WATER_FRAGMENT_SHADER},
-
-	// Fog shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_FOG_FRAGMENT_SHADER},
-
-	// Sky shader
-	{GLSL_DEFAULT_VERTEX_SHADER, GLSL_SKY_FRAGMENT_SHADER},
-
-	{NULL, NULL},
-};
-
 #endif	// GL_SHADERS
 
 void SetupGLFunc4(void)
 {
+	/* 1.2 funcs */
+	pglTexImage3D = GetGLFunc("glTexImage3D");
+	/* 1.3 funcs */
 	pglActiveTexture = GetGLFunc("glActiveTexture");
 	pglMultiTexCoord2f = GetGLFunc("glMultiTexCoord2f");
 	pglClientActiveTexture = GetGLFunc("glClientActiveTexture");
@@ -912,59 +713,75 @@ void SetupGLFunc4(void)
 	pgluBuild2DMipmaps = GetGLFunc("gluBuild2DMipmaps");
 }
 
-EXPORT boolean HWRAPI(CompileShaders) (void)
+EXPORT boolean HWRAPI(InitShaders) (void)
 {
 #ifdef GL_SHADERS
-	GLint i;
-
 	if (!pglUseProgram)
 		return false;
+	
+	gl_fallback_shader.vertex_shader = Z_StrDup(GLSL_FALLBACK_VERTEX_SHADER);
+	gl_fallback_shader.fragment_shader = Z_StrDup(GLSL_FALLBACK_FRAGMENT_SHADER);
 
-	gl_customshaders[SHADER_DEFAULT].vertex = NULL;
-	gl_customshaders[SHADER_DEFAULT].fragment = NULL;
-
-	for (i = 0; gl_shadersources[i].vertex && gl_shadersources[i].fragment; i++)
+	if (!Shader_CompileProgram(&gl_fallback_shader, -1))
 	{
-		gl_shader_t *shader, *usershader;
-		const GLchar *vert_shader = gl_shadersources[i].vertex;
-		const GLchar *frag_shader = gl_shadersources[i].fragment;
+		GL_MSG_Error("Failed to compile the fallback shader program!\n");
+		return false;
+	}
 
-		if (i >= HWR_MAXSHADERS)
-			break;
+	return true;
+#else
+	return false;
+#endif
+}
 
-		shader = &gl_shaders[i];
-		usershader = &gl_usershaders[i];
+EXPORT void HWRAPI(LoadShader) (int slot, char *code, hwdshaderstage_t stage)
+{
+#ifdef GL_SHADERS
+	gl_shader_t *shader;
 
-		if (shader->program)
-			pglDeleteProgram(shader->program);
-		if (usershader->program)
-			pglDeleteProgram(usershader->program);
+	if (slot < 0 || slot >= HWR_MAXSHADERS)
+		I_Error("LoadShader: Invalid slot %d", slot);
 
-		shader->program = 0;
-		usershader->program = 0;
+	shader = &gl_shaders[slot];
 
-		if (!Shader_CompileProgram(shader, i, vert_shader, frag_shader))
-			shader->program = 0;
+#define LOADSHADER(source) { \
+	if (shader->source) \
+		Z_Free(shader->source); \
+	shader->source = code; \
+	}
 
-		// Compile custom shader
-		if ((i == SHADER_DEFAULT) || !(gl_customshaders[i].vertex || gl_customshaders[i].fragment))
-			continue;
+	if (stage == HWD_SHADERSTAGE_VERTEX)
+		LOADSHADER(vertex_shader)
+	else if (stage == HWD_SHADERSTAGE_FRAGMENT)
+		LOADSHADER(fragment_shader)
+	else
+		I_Error("LoadShader: invalid shader stage");
 
-		// 18032019
-		if (gl_customshaders[i].vertex)
-			vert_shader = gl_customshaders[i].vertex;
-		if (gl_customshaders[i].fragment)
-			frag_shader = gl_customshaders[i].fragment;
+#undef LOADSHADER
+#else
+	(void)slot;
+	(void)code;
+	(void)stage;
+#endif
+}
 
-		if (!Shader_CompileProgram(usershader, i, vert_shader, frag_shader))
-		{
-			GL_MSG_Warning("CompileShaders: Could not compile custom shader program for %s\n", HWR_GetShaderName(i));
-			usershader->program = 0;
-		}
-	}
+EXPORT boolean HWRAPI(CompileShader) (int slot)
+{
+#ifdef GL_SHADERS
+	if (slot < 0 || slot >= HWR_MAXSHADERS)
+		I_Error("CompileShader: Invalid slot %d", slot);
 
-	return true;
+	if (Shader_CompileProgram(&gl_shaders[slot], slot))
+	{
+		return true;
+	}
+	else
+	{
+		gl_shaders[slot].program = 0;
+		return false;
+	}
 #else
+	(void)slot;
 	return false;
 #endif
 }
@@ -991,90 +808,36 @@ EXPORT void HWRAPI(SetShaderInfo) (hwdshaderinfo_t info, INT32 value)
 #endif
 }
 
-//
-// Custom shader loading
-//
-EXPORT void HWRAPI(LoadCustomShader) (int number, char *code, size_t size, boolean isfragment)
+EXPORT void HWRAPI(SetShader) (int slot)
 {
 #ifdef GL_SHADERS
-	shadersource_t *shader;
-
-	if (!pglUseProgram)
-		return;
-
-	if (number < 1 || number > HWR_MAXSHADERS)
-		I_Error("LoadCustomShader: cannot load shader %d (min 1, max %d)", number, HWR_MAXSHADERS);
-	else if (code == NULL)
-		I_Error("LoadCustomShader: empty shader");
-
-	shader = &gl_customshaders[number];
-
-#define COPYSHADER(source) { \
-	if (shader->source) \
-		free(shader->source); \
-	shader->source = malloc(size+1); \
-	strncpy(shader->source, code, size); \
-	shader->source[size] = 0; \
-	}
-
-	if (isfragment)
-		COPYSHADER(fragment)
-	else
-		COPYSHADER(vertex)
-
-#else
-	(void)number;
-	(void)shader;
-	(void)size;
-	(void)fragment;
-#endif
-}
-
-EXPORT void HWRAPI(SetShader) (int type)
-{
-#ifdef GL_SHADERS
-	if (type == SHADER_NONE)
+	if (slot == SHADER_NONE)
 	{
 		UnSetShader();
 		return;
 	}
-
-	if (gl_allowshaders != HWD_SHADEROPTION_OFF)
+	if (gl_allowshaders)
 	{
-		gl_shader_t *shader = gl_shaderstate.current;
+		gl_shader_t *next_shader = &gl_shaders[slot]; // the gl_shader_t we are going to switch to
 
-		// If using model lighting, set the appropriate shader.
-		// However don't override a custom shader.
-		if (type == SHADER_MODEL && model_lighting
-		&& !(gl_shaders[SHADER_MODEL].custom && !gl_shaders[SHADER_MODEL_LIGHTING].custom))
-			type = SHADER_MODEL_LIGHTING;
+		if (!next_shader->program)
+			next_shader = &gl_fallback_shader; // unusable shader, use fallback instead
 
-		if ((shader == NULL) || (GLuint)type != gl_shaderstate.type)
+		// update gl_shaderstate if an actual shader switch is needed
+		if (gl_shaderstate.current != next_shader)
 		{
-			gl_shader_t *baseshader = &gl_shaders[type];
-			gl_shader_t *usershader = &gl_usershaders[type];
-
-			if (usershader->program)
-				shader = (gl_allowshaders == HWD_SHADEROPTION_NOCUSTOM) ? baseshader : usershader;
-			else
-				shader = baseshader;
-
-			gl_shaderstate.current = shader;
-			gl_shaderstate.type = type;
+			gl_shaderstate.current = next_shader;
+			gl_shaderstate.program = next_shader->program;
+			gl_shaderstate.type = slot;
 			gl_shaderstate.changed = true;
 		}
 
-		if (gl_shaderstate.program != shader->program)
-		{
-			gl_shaderstate.program = shader->program;
-			gl_shaderstate.changed = true;
-		}
+		gl_shadersenabled = true;
 
-		gl_shadersenabled = (shader->program != 0);
 		return;
 	}
 #else
-	(void)type;
+	(void)slot;
 #endif
 	gl_shadersenabled = false;
 }
@@ -1082,36 +845,20 @@ EXPORT void HWRAPI(SetShader) (int type)
 EXPORT void HWRAPI(UnSetShader) (void)
 {
 #ifdef GL_SHADERS
-	gl_shaderstate.current = NULL;
-	gl_shaderstate.type = 0;
-	gl_shaderstate.program = 0;
+	if (gl_shadersenabled) // don't repeatedly call glUseProgram if not needed
+	{
+		gl_shaderstate.current = NULL;
+		gl_shaderstate.type = 0;
+		gl_shaderstate.program = 0;
 
-	if (pglUseProgram)
-		pglUseProgram(0);
+		if (pglUseProgram)
+			pglUseProgram(0);
+	}
 #endif
 
 	gl_shadersenabled = false;
 }
 
-EXPORT void HWRAPI(CleanShaders) (void)
-{
-	INT32 i;
-
-	for (i = 1; i < HWR_MAXSHADERS; i++)
-	{
-		shadersource_t *shader = &gl_customshaders[i];
-
-		if (shader->vertex)
-			free(shader->vertex);
-
-		if (shader->fragment)
-			free(shader->fragment);
-
-		shader->vertex = NULL;
-		shader->fragment = NULL;
-	}
-}
-
 // -----------------+
 // SetNoTexture     : Disable texture
 // -----------------+
@@ -1407,55 +1154,38 @@ EXPORT void HWRAPI(ClearMipMapCache) (void)
 }
 
 
-// -----------------+
-// ReadRect         : Read a rectangle region of the truecolor framebuffer
-//                  : store pixels as 16bit 565 RGB
-// Returns          : 16bit 565 RGB pixel array stored in dst_data
-// -----------------+
-EXPORT void HWRAPI(ReadRect) (INT32 x, INT32 y, INT32 width, INT32 height,
-                                INT32 dst_stride, UINT16 * dst_data)
+// Writes screen texture tex into dst_data.
+// Pixel format is 24-bit RGB. Row order is top to bottom.
+// Dimensions are screen_width * screen_height.
+EXPORT void HWRAPI(ReadScreenTexture) (int tex, UINT8 *dst_data)
 {
 	INT32 i;
-	// GL_DBG_Printf ("ReadRect()\n");
-	if (dst_stride == width*3)
-	{
-		GLubyte*top = (GLvoid*)dst_data, *bottom = top + dst_stride * (height - 1);
-		GLubyte *row = malloc(dst_stride);
-		if (!row) return;
-		pglPixelStorei(GL_PACK_ALIGNMENT, 1);
-		pglReadPixels(x, y, width, height, GL_RGB, GL_UNSIGNED_BYTE, dst_data);
-		pglPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-		for(i = 0; i < height/2; i++)
-		{
-			memcpy(row, top, dst_stride);
-			memcpy(top, bottom, dst_stride);
-			memcpy(bottom, row, dst_stride);
-			top += dst_stride;
-			bottom -= dst_stride;
-		}
-		free(row);
-	}
-	else
-	{
-		INT32 j;
-		GLubyte *image = malloc(width*height*3*sizeof (*image));
-		if (!image) return;
-		pglPixelStorei(GL_PACK_ALIGNMENT, 1);
-		pglReadPixels(x, y, width, height, GL_RGB, GL_UNSIGNED_BYTE, image);
-		pglPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-		for (i = height-1; i >= 0; i--)
-		{
-			for (j = 0; j < width; j++)
-			{
-				dst_data[(height-1-i)*width+j] =
-				(UINT16)(
-				                 ((image[(i*width+j)*3]>>3)<<11) |
-				                 ((image[(i*width+j)*3+1]>>2)<<5) |
-				                 ((image[(i*width+j)*3+2]>>3)));
-			}
-		}
-		free(image);
-	}
+	int dst_stride = screen_width * 3; // stride between rows of image data
+	GLubyte*top = (GLvoid*)dst_data, *bottom = top + dst_stride * (screen_height - 1);
+	GLubyte *row;
+	row = malloc(dst_stride);
+	if (!row) return;
+	// at the time this function is called, generic2 can be found drawn on the framebuffer
+	// if some other screen texture is needed, draw it to the framebuffer
+	// and draw generic2 back after reading the framebuffer.
+	// this hack is for some reason **much** faster than the simple solution of using glGetTexImage.
+	if (tex != HWD_SCREENTEXTURE_GENERIC2)
+		DrawScreenTexture(tex, NULL, 0);
+	pglPixelStorei(GL_PACK_ALIGNMENT, 1);
+	pglReadPixels(0, 0, screen_width, screen_height, GL_RGB, GL_UNSIGNED_BYTE, dst_data);
+	if (tex != HWD_SCREENTEXTURE_GENERIC2)
+		DrawScreenTexture(HWD_SCREENTEXTURE_GENERIC2, NULL, 0);
+	// Flip image upside down.
+	// In other words, convert OpenGL's "bottom->top" row order into "top->bottom".
+	for(i = 0; i < screen_height/2; i++)
+	{
+		memcpy(row, top, dst_stride);
+		memcpy(top, bottom, dst_stride);
+		memcpy(bottom, row, dst_stride);
+		top += dst_stride;
+		bottom -= dst_stride;
+	}
+	free(row);
 }
 
 
@@ -2071,69 +1801,91 @@ static void Shader_SetUniforms(FSurfaceInfo *Surface, GLRGBAFloat *poly, GLRGBAF
 #endif
 }
 
-static boolean Shader_CompileProgram(gl_shader_t *shader, GLint i, const GLchar *vert_shader, const GLchar *frag_shader)
+static boolean Shader_CompileProgram(gl_shader_t *shader, GLint i)
 {
-	GLuint gl_vertShader, gl_fragShader;
+	GLuint gl_vertShader = 0;
+	GLuint gl_fragShader = 0;
 	GLint result;
+	const GLchar *vert_shader = shader->vertex_shader;
+	const GLchar *frag_shader = shader->fragment_shader;
 
-	//
-	// Load and compile vertex shader
-	//
-	gl_vertShader = pglCreateShader(GL_VERTEX_SHADER);
-	if (!gl_vertShader)
+	if (shader->program)
+		pglDeleteProgram(shader->program);
+
+	if (!vert_shader && !frag_shader)
 	{
-		GL_MSG_Error("Shader_CompileProgram: Error creating vertex shader %s\n", HWR_GetShaderName(i));
+		GL_MSG_Error("Shader_CompileProgram: Missing shaders for shader program %s\n", HWR_GetShaderName(i));
 		return false;
 	}
 
-	pglShaderSource(gl_vertShader, 1, &vert_shader, NULL);
-	pglCompileShader(gl_vertShader);
-
-	// check for compile errors
-	pglGetShaderiv(gl_vertShader, GL_COMPILE_STATUS, &result);
-	if (result == GL_FALSE)
+	if (vert_shader)
 	{
-		Shader_CompileError("Error compiling vertex shader", gl_vertShader, i);
-		pglDeleteShader(gl_vertShader);
-		return false;
+		//
+		// Load and compile vertex shader
+		//
+		gl_vertShader = pglCreateShader(GL_VERTEX_SHADER);
+		if (!gl_vertShader)
+		{
+			GL_MSG_Error("Shader_CompileProgram: Error creating vertex shader %s\n", HWR_GetShaderName(i));
+			return false;
+		}
+
+		pglShaderSource(gl_vertShader, 1, &vert_shader, NULL);
+		pglCompileShader(gl_vertShader);
+
+		// check for compile errors
+		pglGetShaderiv(gl_vertShader, GL_COMPILE_STATUS, &result);
+		if (result == GL_FALSE)
+		{
+			Shader_CompileError("Error compiling vertex shader", gl_vertShader, i);
+			pglDeleteShader(gl_vertShader);
+			return false;
+		}
 	}
 
-	//
-	// Load and compile fragment shader
-	//
-	gl_fragShader = pglCreateShader(GL_FRAGMENT_SHADER);
-	if (!gl_fragShader)
+	if (frag_shader)
 	{
-		GL_MSG_Error("Shader_CompileProgram: Error creating fragment shader %s\n", HWR_GetShaderName(i));
-		pglDeleteShader(gl_vertShader);
-		pglDeleteShader(gl_fragShader);
-		return false;
-	}
+		//
+		// Load and compile fragment shader
+		//
+		gl_fragShader = pglCreateShader(GL_FRAGMENT_SHADER);
+		if (!gl_fragShader)
+		{
+			GL_MSG_Error("Shader_CompileProgram: Error creating fragment shader %s\n", HWR_GetShaderName(i));
+			pglDeleteShader(gl_vertShader);
+			pglDeleteShader(gl_fragShader);
+			return false;
+		}
 
-	pglShaderSource(gl_fragShader, 1, &frag_shader, NULL);
-	pglCompileShader(gl_fragShader);
+		pglShaderSource(gl_fragShader, 1, &frag_shader, NULL);
+		pglCompileShader(gl_fragShader);
 
-	// check for compile errors
-	pglGetShaderiv(gl_fragShader, GL_COMPILE_STATUS, &result);
-	if (result == GL_FALSE)
-	{
-		Shader_CompileError("Error compiling fragment shader", gl_fragShader, i);
-		pglDeleteShader(gl_vertShader);
-		pglDeleteShader(gl_fragShader);
-		return false;
+		// check for compile errors
+		pglGetShaderiv(gl_fragShader, GL_COMPILE_STATUS, &result);
+		if (result == GL_FALSE)
+		{
+			Shader_CompileError("Error compiling fragment shader", gl_fragShader, i);
+			pglDeleteShader(gl_vertShader);
+			pglDeleteShader(gl_fragShader);
+			return false;
+		}
 	}
 
 	shader->program = pglCreateProgram();
-	pglAttachShader(shader->program, gl_vertShader);
-	pglAttachShader(shader->program, gl_fragShader);
+	if (vert_shader)
+		pglAttachShader(shader->program, gl_vertShader);
+	if (frag_shader)
+		pglAttachShader(shader->program, gl_fragShader);
 	pglLinkProgram(shader->program);
 
 	// check link status
 	pglGetProgramiv(shader->program, GL_LINK_STATUS, &result);
 
 	// delete the shader objects
-	pglDeleteShader(gl_vertShader);
-	pglDeleteShader(gl_fragShader);
+	if (vert_shader)
+		pglDeleteShader(gl_vertShader);
+	if (frag_shader)
+		pglDeleteShader(gl_fragShader);
 
 	// couldn't link?
 	if (result != GL_TRUE)
@@ -2154,11 +1906,31 @@ static boolean Shader_CompileProgram(gl_shader_t *shader, GLint i, const GLchar
 	shader->uniforms[gluniform_fade_start] = GETUNI("fade_start");
 	shader->uniforms[gluniform_fade_end] = GETUNI("fade_end");
 
-	// misc. (custom shaders)
-	shader->uniforms[gluniform_leveltime] = GETUNI("leveltime");
+	// palette rendering
+	shader->uniforms[gluniform_palette_tex] = GETUNI("palette_tex");
+	shader->uniforms[gluniform_palette_lookup_tex] = GETUNI("palette_lookup_tex");
+	shader->uniforms[gluniform_lighttable_tex] = GETUNI("lighttable_tex");
 
+	// misc.
+	shader->uniforms[gluniform_leveltime] = GETUNI("leveltime");
 #undef GETUNI
 
+	// set permanent uniform values
+#define UNIFORM_1(uniform, a, function) \
+	if (uniform != -1) \
+		function (uniform, a);
+
+	pglUseProgram(shader->program);
+
+	// texture unit numbers for the samplers used for palette rendering
+	UNIFORM_1(shader->uniforms[gluniform_palette_tex], 2, pglUniform1i);
+	UNIFORM_1(shader->uniforms[gluniform_palette_lookup_tex], 1, pglUniform1i);
+	UNIFORM_1(shader->uniforms[gluniform_lighttable_tex], 2, pglUniform1i);
+
+	// restore gl shader state
+	pglUseProgram(gl_shaderstate.program);
+#undef UNIFORM_1
+
 	return true;
 }
 
@@ -2182,6 +1954,7 @@ static void Shader_CompileError(const char *message, GLuint program, INT32 shade
 }
 
 // code that is common between DrawPolygon and DrawIndexedTriangles
+// DrawScreenTexture also can use this function for fancier screen texture drawing
 // the corona thing is there too, i have no idea if that stuff works with DrawIndexedTriangles and batching
 static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD PolyFlags)
 {
@@ -2221,6 +1994,14 @@ static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD
 			fade.green = byte2float[pSurf->FadeColor.s.green];
 			fade.blue  = byte2float[pSurf->FadeColor.s.blue];
 			fade.alpha = byte2float[pSurf->FadeColor.s.alpha];
+
+			if (pSurf->LightTableId && pSurf->LightTableId != lt_downloaded)
+			{
+				pglActiveTexture(GL_TEXTURE2);
+				pglBindTexture(GL_TEXTURE_2D, pSurf->LightTableId);
+				pglActiveTexture(GL_TEXTURE0);
+				lt_downloaded = pSurf->LightTableId;
+			}
 		}
 	}
 
@@ -2413,9 +2194,6 @@ EXPORT void HWRAPI(RenderSkyDome) (gl_sky_t *sky)
 	pglDisableClientState(GL_COLOR_ARRAY);
 }
 
-// ==========================================================================
-//
-// ==========================================================================
 EXPORT void HWRAPI(SetSpecialState) (hwdspecialstate_t IdState, INT32 Value)
 {
 	switch (IdState)
@@ -2425,7 +2203,7 @@ EXPORT void HWRAPI(SetSpecialState) (hwdspecialstate_t IdState, INT32 Value)
 			break;
 
 		case HWD_SET_SHADERS:
-			gl_allowshaders = (hwdshaderoption_t)Value;
+			gl_allowshaders = Value;
 			break;
 
 		case HWD_SET_TEXTUREFILTERMODE:
@@ -2784,6 +2562,14 @@ static void DrawModelEx(model_t *model, INT32 frameIndex, float duration, float
 	else if (Surface->PolyColor.s.alpha == 0xFF)
 		flags |= (PF_Occlude | PF_Masked);
 
+	if (Surface->LightTableId && Surface->LightTableId != lt_downloaded)
+	{
+		pglActiveTexture(GL_TEXTURE2);
+		pglBindTexture(GL_TEXTURE_2D, Surface->LightTableId);
+		pglActiveTexture(GL_TEXTURE0);
+		lt_downloaded = Surface->LightTableId;
+	}
+
 	SetBlend(flags);
 	Shader_SetUniforms(Surface, &poly, &tint, &fade);
 
@@ -3071,7 +2857,7 @@ EXPORT void HWRAPI(PostImgRedraw) (float points[SCREENVERTS][SCREENVERTS][2])
 	INT32 x, y;
 	float float_x, float_y, float_nextx, float_nexty;
 	float xfix, yfix;
-	INT32 texsize = 2048;
+	INT32 texsize = 512;
 
 	const float blackBack[16] =
 	{
@@ -3081,11 +2867,9 @@ EXPORT void HWRAPI(PostImgRedraw) (float points[SCREENVERTS][SCREENVERTS][2])
 		16.0f, -16.0f, 6.0f
 	};
 
-	// Use a power of two texture, dammit
-	if(screen_width <= 1024)
-		texsize = 1024;
-	if(screen_width <= 512)
-		texsize = 512;
+	// look for power of two that is large enough for the screen
+	while (texsize < screen_width || texsize < screen_height)
+		texsize <<= 1;
 
 	// X/Y stretch fix for all resolutions(!)
 	xfix = (float)(texsize)/((float)((screen_width)/(float)(SCREENVERTS-1)));
@@ -3159,84 +2943,16 @@ EXPORT void HWRAPI(PostImgRedraw) (float points[SCREENVERTS][SCREENVERTS][2])
 //			a new size
 EXPORT void HWRAPI(FlushScreenTextures) (void)
 {
-	pglDeleteTextures(1, &screentexture);
-	pglDeleteTextures(1, &startScreenWipe);
-	pglDeleteTextures(1, &endScreenWipe);
-	pglDeleteTextures(1, &finalScreenTexture);
-	screentexture = 0;
-	startScreenWipe = 0;
-	endScreenWipe = 0;
-	finalScreenTexture = 0;
-}
-
-// Create Screen to fade from
-EXPORT void HWRAPI(StartScreenWipe) (void)
-{
-	INT32 texsize = 2048;
-	boolean firstTime = (startScreenWipe == 0);
-
-	// Use a power of two texture, dammit
-	if(screen_width <= 512)
-		texsize = 512;
-	else if(screen_width <= 1024)
-		texsize = 1024;
-
-	// Create screen texture
-	if (firstTime)
-		pglGenTextures(1, &startScreenWipe);
-	pglBindTexture(GL_TEXTURE_2D, startScreenWipe);
-
-	if (firstTime)
-	{
-		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-		Clamp2D(GL_TEXTURE_WRAP_S);
-		Clamp2D(GL_TEXTURE_WRAP_T);
-		pglCopyTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 0, 0, texsize, texsize, 0);
-	}
-	else
-		pglCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, texsize, texsize);
-
-	tex_downloaded = startScreenWipe;
+	int i;
+	pglDeleteTextures(NUMSCREENTEXTURES, screenTextures);
+	for (i = 0; i < NUMSCREENTEXTURES; i++)
+		screenTextures[i] = 0;
 }
 
-// Create Screen to fade to
-EXPORT void HWRAPI(EndScreenWipe)(void)
-{
-	INT32 texsize = 2048;
-	boolean firstTime = (endScreenWipe == 0);
-
-	// Use a power of two texture, dammit
-	if(screen_width <= 512)
-		texsize = 512;
-	else if(screen_width <= 1024)
-		texsize = 1024;
-
-	// Create screen texture
-	if (firstTime)
-		pglGenTextures(1, &endScreenWipe);
-	pglBindTexture(GL_TEXTURE_2D, endScreenWipe);
-
-	if (firstTime)
-	{
-		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-		Clamp2D(GL_TEXTURE_WRAP_S);
-		Clamp2D(GL_TEXTURE_WRAP_T);
-		pglCopyTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 0, 0, texsize, texsize, 0);
-	}
-	else
-		pglCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, texsize, texsize);
-
-	tex_downloaded = endScreenWipe;
-}
-
-
-// Draw the last scene under the intermission
-EXPORT void HWRAPI(DrawIntermissionBG)(void)
+EXPORT void HWRAPI(DrawScreenTexture)(int tex, FSurfaceInfo *surf, FBITFIELD polyflags)
 {
 	float xfix, yfix;
-	INT32 texsize = 2048;
+	INT32 texsize = 512;
 
 	const float screenVerts[12] =
 	{
@@ -3248,10 +2964,9 @@ EXPORT void HWRAPI(DrawIntermissionBG)(void)
 
 	float fix[8];
 
-	if(screen_width <= 1024)
-		texsize = 1024;
-	if(screen_width <= 512)
-		texsize = 512;
+	// look for power of two that is large enough for the screen
+	while (texsize < screen_width || texsize < screen_height)
+		texsize <<= 1;
 
 	xfix = 1/((float)(texsize)/((float)((screen_width))));
 	yfix = 1/((float)(texsize)/((float)((screen_height))));
@@ -3270,20 +2985,23 @@ EXPORT void HWRAPI(DrawIntermissionBG)(void)
 
 	pglClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
 
-	pglBindTexture(GL_TEXTURE_2D, screentexture);
-	pglColor4ubv(white);
+	pglBindTexture(GL_TEXTURE_2D, screenTextures[tex]);
+	PreparePolygon(surf, NULL, surf ? polyflags : (PF_NoDepthTest));
+	if (!surf)
+		pglColor4ubv(white);
 
 	pglTexCoordPointer(2, GL_FLOAT, 0, fix);
 	pglVertexPointer(3, GL_FLOAT, 0, screenVerts);
 	pglDrawArrays(GL_TRIANGLE_FAN, 0, 4);
 
-	tex_downloaded = screentexture;
+	tex_downloaded = screenTextures[tex];
 }
 
 // Do screen fades!
-EXPORT void HWRAPI(DoScreenWipe)(void)
+EXPORT void HWRAPI(DoScreenWipe)(int wipeStart, int wipeEnd, FSurfaceInfo *surf,
+		FBITFIELD polyFlags)
 {
-	INT32 texsize = 2048;
+	INT32 texsize = 512;
 	float xfix, yfix;
 
 	INT32 fademaskdownloaded = tex_downloaded; // the fade mask that has been set
@@ -3306,11 +3024,15 @@ EXPORT void HWRAPI(DoScreenWipe)(void)
 		1.0f, 1.0f
 	};
 
-	// Use a power of two texture, dammit
-	if(screen_width <= 1024)
-		texsize = 1024;
-	if(screen_width <= 512)
-		texsize = 512;
+	int firstScreen;
+	if (surf && surf->PolyColor.s.alpha == 255)
+		firstScreen = wipeEnd; // it's a tinted fade-in, we need wipeEnd
+	else
+		firstScreen = wipeStart;
+
+	// look for power of two that is large enough for the screen
+	while (texsize < screen_width || texsize < screen_height)
+		texsize <<= 1;
 
 	xfix = 1/((float)(texsize)/((float)((screen_width))));
 	yfix = 1/((float)(texsize)/((float)((screen_height))));
@@ -3332,91 +3054,71 @@ EXPORT void HWRAPI(DoScreenWipe)(void)
 	SetBlend(PF_Modulated|PF_NoDepthTest);
 	pglEnable(GL_TEXTURE_2D);
 
-	// Draw the original screen
-	pglBindTexture(GL_TEXTURE_2D, startScreenWipe);
+	pglBindTexture(GL_TEXTURE_2D, screenTextures[firstScreen]);
 	pglColor4ubv(white);
 	pglTexCoordPointer(2, GL_FLOAT, 0, fix);
 	pglVertexPointer(3, GL_FLOAT, 0, screenVerts);
 	pglDrawArrays(GL_TRIANGLE_FAN, 0, 4);
 
-	SetBlend(PF_Modulated|PF_Translucent|PF_NoDepthTest);
+	if (surf)
+	{
+		// Draw fade mask to screen using surf and polyFlags
+		// Used for colormap/tinted wipes.
+		pglBindTexture(GL_TEXTURE_2D, fademaskdownloaded);
+		pglTexCoordPointer(2, GL_FLOAT, 0, defaultST);
+		pglVertexPointer(3, GL_FLOAT, 0, screenVerts);
+		PreparePolygon(surf, NULL, polyFlags);
+		pglDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+	}
+	else // Blend wipeEnd into screen with the fade mask
+	{
+		SetBlend(PF_Modulated|PF_Translucent|PF_NoDepthTest);
 
-	// Draw the end screen that fades in
-	pglActiveTexture(GL_TEXTURE0);
-	pglEnable(GL_TEXTURE_2D);
-	pglBindTexture(GL_TEXTURE_2D, endScreenWipe);
-	pglTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+		// Draw the end screen that fades in
+		pglActiveTexture(GL_TEXTURE0);
+		pglEnable(GL_TEXTURE_2D);
+		pglBindTexture(GL_TEXTURE_2D, screenTextures[wipeEnd]);
+		pglTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
 
-	pglActiveTexture(GL_TEXTURE1);
-	pglEnable(GL_TEXTURE_2D);
-	pglBindTexture(GL_TEXTURE_2D, fademaskdownloaded);
+		pglActiveTexture(GL_TEXTURE1);
+		pglEnable(GL_TEXTURE_2D);
+		pglBindTexture(GL_TEXTURE_2D, fademaskdownloaded);
 
-	pglTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
+		pglTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
 
-	// const float defaultST[8]
+		// const float defaultST[8]
 
-	pglClientActiveTexture(GL_TEXTURE0);
-	pglTexCoordPointer(2, GL_FLOAT, 0, fix);
-	pglVertexPointer(3, GL_FLOAT, 0, screenVerts);
-	pglClientActiveTexture(GL_TEXTURE1);
-	pglEnableClientState(GL_TEXTURE_COORD_ARRAY);
-	pglTexCoordPointer(2, GL_FLOAT, 0, defaultST);
-	pglDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+		pglClientActiveTexture(GL_TEXTURE0);
+		pglTexCoordPointer(2, GL_FLOAT, 0, fix);
+		pglVertexPointer(3, GL_FLOAT, 0, screenVerts);
+		pglClientActiveTexture(GL_TEXTURE1);
+		pglEnableClientState(GL_TEXTURE_COORD_ARRAY);
+		pglTexCoordPointer(2, GL_FLOAT, 0, defaultST);
+		pglDrawArrays(GL_TRIANGLE_FAN, 0, 4);
 
-	pglDisable(GL_TEXTURE_2D); // disable the texture in the 2nd texture unit
-	pglDisableClientState(GL_TEXTURE_COORD_ARRAY);
+		pglDisable(GL_TEXTURE_2D); // disable the texture in the 2nd texture unit
+		pglDisableClientState(GL_TEXTURE_COORD_ARRAY);
 
-	pglActiveTexture(GL_TEXTURE0);
-	pglClientActiveTexture(GL_TEXTURE0);
-	tex_downloaded = endScreenWipe;
-}
-
-// Create a texture from the screen.
-EXPORT void HWRAPI(MakeScreenTexture) (void)
-{
-	INT32 texsize = 2048;
-	boolean firstTime = (screentexture == 0);
-
-	// Use a power of two texture, dammit
-	if(screen_width <= 512)
-		texsize = 512;
-	else if(screen_width <= 1024)
-		texsize = 1024;
-
-	// Create screen texture
-	if (firstTime)
-		pglGenTextures(1, &screentexture);
-	pglBindTexture(GL_TEXTURE_2D, screentexture);
-
-	if (firstTime)
-	{
-		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-		Clamp2D(GL_TEXTURE_WRAP_S);
-		Clamp2D(GL_TEXTURE_WRAP_T);
-		pglCopyTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 0, 0, texsize, texsize, 0);
+		pglActiveTexture(GL_TEXTURE0);
+		pglClientActiveTexture(GL_TEXTURE0);
+		tex_downloaded = screenTextures[wipeEnd];
 	}
-	else
-		pglCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, texsize, texsize);
-
-	tex_downloaded = screentexture;
 }
 
-EXPORT void HWRAPI(MakeScreenFinalTexture) (void)
+// Create a texture from the screen.
+EXPORT void HWRAPI(MakeScreenTexture) (int tex)
 {
-	INT32 texsize = 2048;
-	boolean firstTime = (finalScreenTexture == 0);
+	INT32 texsize = 512;
+	boolean firstTime = (screenTextures[tex] == 0);
 
-	// Use a power of two texture, dammit
-	if(screen_width <= 512)
-		texsize = 512;
-	else if(screen_width <= 1024)
-		texsize = 1024;
+	// look for power of two that is large enough for the screen
+	while (texsize < screen_width || texsize < screen_height)
+		texsize <<= 1;
 
 	// Create screen texture
 	if (firstTime)
-		pglGenTextures(1, &finalScreenTexture);
-	pglBindTexture(GL_TEXTURE_2D, finalScreenTexture);
+		pglGenTextures(1, &screenTextures[tex]);
+	pglBindTexture(GL_TEXTURE_2D, screenTextures[tex]);
 
 	if (firstTime)
 	{
@@ -3429,24 +3131,23 @@ EXPORT void HWRAPI(MakeScreenFinalTexture) (void)
 	else
 		pglCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, texsize, texsize);
 
-	tex_downloaded = finalScreenTexture;
+	tex_downloaded = screenTextures[tex];
 }
 
-EXPORT void HWRAPI(DrawScreenFinalTexture)(int width, int height)
+EXPORT void HWRAPI(DrawScreenFinalTexture)(int tex, int width, int height)
 {
 	float xfix, yfix;
 	float origaspect, newaspect;
 	float xoff = 1, yoff = 1; // xoffset and yoffset for the polygon to have black bars around the screen
 	FRGBAFloat clearColour;
-	INT32 texsize = 2048;
+	INT32 texsize = 512;
 
 	float off[12];
 	float fix[8];
 
-	if(screen_width <= 1024)
-		texsize = 1024;
-	if(screen_width <= 512)
-		texsize = 512;
+	// look for power of two that is large enough for the screen
+	while (texsize < screen_width || texsize < screen_height)
+		texsize <<= 1;
 
 	xfix = 1/((float)(texsize)/((float)((screen_width))));
 	yfix = 1/((float)(texsize)/((float)((screen_height))));
@@ -3493,7 +3194,8 @@ EXPORT void HWRAPI(DrawScreenFinalTexture)(int width, int height)
 	clearColour.red = clearColour.green = clearColour.blue = 0;
 	clearColour.alpha = 1;
 	ClearBuffer(true, false, &clearColour);
-	pglBindTexture(GL_TEXTURE_2D, finalScreenTexture);
+	SetBlend(PF_NoDepthTest);
+	pglBindTexture(GL_TEXTURE_2D, screenTextures[tex]);
 
 	pglColor4ubv(white);
 
@@ -3501,7 +3203,92 @@ EXPORT void HWRAPI(DrawScreenFinalTexture)(int width, int height)
 	pglVertexPointer(3, GL_FLOAT, 0, off);
 
 	pglDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-	tex_downloaded = finalScreenTexture;
+	tex_downloaded = screenTextures[tex];
+}
+
+EXPORT void HWRAPI(SetPaletteLookup)(UINT8 *lut)
+{
+	GLenum internalFormat;
+	if (gl_version[0] == '1' || gl_version[0] == '2')
+	{
+		// if the OpenGL version is below 3.0, then the GL_R8 format may not be available.
+		// so use GL_LUMINANCE8 instead to get a single component 8-bit format
+		// (it is possible to have access to shaders even in some OpenGL 1.x systems,
+		// so palette rendering can still possibly be achieved there)
+		internalFormat = GL_LUMINANCE8;
+	}
+	else
+	{
+		internalFormat = GL_R8;
+	}
+	if (!paletteLookupTex)
+		pglGenTextures(1, &paletteLookupTex);
+	pglActiveTexture(GL_TEXTURE1);
+	pglBindTexture(GL_TEXTURE_3D, paletteLookupTex);
+	pglTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+	pglTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+	pglTexImage3D(GL_TEXTURE_3D, 0, internalFormat, HWR_PALETTE_LUT_SIZE, HWR_PALETTE_LUT_SIZE, HWR_PALETTE_LUT_SIZE,
+		0, GL_RED, GL_UNSIGNED_BYTE, lut);
+	pglActiveTexture(GL_TEXTURE0);
+}
+
+EXPORT UINT32 HWRAPI(CreateLightTable)(RGBA_t *hw_lighttable)
+{
+	LTListItem *item = malloc(sizeof(LTListItem));
+	if (!LightTablesTail)
+	{
+		LightTablesHead = LightTablesTail = item;
+	}
+	else
+	{
+		LightTablesTail->next = item;
+		LightTablesTail = item;
+	}
+	item->next = NULL;
+	pglGenTextures(1, &item->id);
+	pglBindTexture(GL_TEXTURE_2D, item->id);
+	pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+	pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+	pglTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, hw_lighttable);
+
+	// restore previously bound texture
+	pglBindTexture(GL_TEXTURE_2D, tex_downloaded);
+
+	return item->id;
+}
+
+// Delete light table textures, ids given before become invalid and must not be used.
+EXPORT void HWRAPI(ClearLightTables)(void)
+{
+	while (LightTablesHead)
+	{
+		LTListItem *item = LightTablesHead;
+		pglDeleteTextures(1, (GLuint *)&item->id);
+		LightTablesHead = item->next;
+		free(item);
+	}
+
+	LightTablesTail = NULL;
+
+	// we no longer have a bound light table (if we had one), we just deleted it!
+	lt_downloaded = 0;
+}
+
+// This palette is used for the palette rendering postprocessing step.
+EXPORT void HWRAPI(SetScreenPalette)(RGBA_t *palette)
+{
+	if (memcmp(screenPalette, palette, sizeof(screenPalette)))
+	{
+		memcpy(screenPalette, palette, sizeof(screenPalette));
+		if (!screenPaletteTex)
+			pglGenTextures(1, &screenPaletteTex);
+		pglActiveTexture(GL_TEXTURE2);
+		pglBindTexture(GL_TEXTURE_1D, screenPaletteTex);
+		pglTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+		pglTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+		pglTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 256, 0, GL_RGBA, GL_UNSIGNED_BYTE, palette);
+		pglActiveTexture(GL_TEXTURE0);
+	}
 }
 
 #endif //HWRENDER
diff --git a/src/hardware/r_opengl/r_opengl.h b/src/hardware/r_opengl/r_opengl.h
index f44e0818bbeff0cae3160cd788d67b11587e7961..f7e33c46aa36b9c416a80dc9158c5c7321fb78d0 100644
--- a/src/hardware/r_opengl/r_opengl.h
+++ b/src/hardware/r_opengl/r_opengl.h
@@ -46,6 +46,7 @@
 #define  _CREATE_DLL_  // necessary for Unix AND Windows
 #include "../../doomdef.h"
 #include "../hw_drv.h"
+#include "../../z_zone.h"
 
 // ==========================================================================
 //                                                                DEFINITIONS
diff --git a/src/m_anigif.c b/src/m_anigif.c
index 5bc7717e0f44a4293e52a8907c57b0cfda6eade5..6e6ec68aa49760ddc5ab9ffa025356e0a13ffee3 100644
--- a/src/m_anigif.c
+++ b/src/m_anigif.c
@@ -21,6 +21,7 @@
 #include "i_system.h" // I_GetPreciseTime
 #include "m_misc.h"
 #include "st_stuff.h" // st_palette
+#include "doomstat.h" // singletics
 
 #ifdef HWRENDER
 #include "hardware/hw_main.h"
@@ -604,7 +605,7 @@ static void GIF_framewrite(void)
 		UINT16 delay = 0;
 		INT32 startline;
 
-		if (gif_dynamicdelay ==(UINT8) 2)
+		if (gif_dynamicdelay ==(UINT8) 2 && !singletics)
 		{
 			// golden's attempt at creating a "dynamic delay"
 			UINT16 mingifdelay = 10; // minimum gif delay in milliseconds (keep at 10 because gifs can't get more precise).
@@ -617,7 +618,7 @@ static void GIF_framewrite(void)
 				gif_delayus -= frames*(mingifdelay*1000); // remove frames by the amount of milliseconds they take. don't reset to 0, the microseconds help consistency.
 			}
 		}
-		else if (gif_dynamicdelay ==(UINT8) 1)
+		else if (gif_dynamicdelay ==(UINT8) 1 && !singletics)
 		{
 			float delayf = ceil(100.0f/NEWTICRATE);
 
diff --git a/src/m_menu.c b/src/m_menu.c
index edbbdf2c1587a785479f8333c5ff0a8cbb997806..3c6ef0fe3a145837ef1401603b99d8e321ee1b97 100644
--- a/src/m_menu.c
+++ b/src/m_menu.c
@@ -1406,18 +1406,19 @@ static menuitem_t OP_OpenGLOptionsMenu[] =
 
 	{IT_HEADER, NULL, "General", NULL, 51},
 	{IT_STRING|IT_CVAR,         NULL, "Shaders",             &cv_glshaders,            63},
-	{IT_STRING|IT_CVAR,         NULL, "Lack of perspective", &cv_glshearing,           73},
-	{IT_STRING|IT_CVAR,         NULL, "Field of view",       &cv_fov,                  83},
-
-	{IT_HEADER, NULL, "Miscellaneous", NULL, 102},
-	{IT_STRING|IT_CVAR,         NULL, "Bit depth",           &cv_scr_depth,           114},
-	{IT_STRING|IT_CVAR,         NULL, "Texture filter",      &cv_glfiltermode,        124},
-	{IT_STRING|IT_CVAR,         NULL, "Anisotropic",         &cv_glanisotropicmode,   134},
+	{IT_STRING|IT_CVAR,         NULL, "Palette rendering",   &cv_glpaletterendering,   73},
+	{IT_STRING|IT_CVAR,         NULL, "Lack of perspective", &cv_glshearing,           83},
+	{IT_STRING|IT_CVAR,         NULL, "Field of view",       &cv_fov,                  93},
+
+	{IT_HEADER, NULL, "Miscellaneous", NULL, 112},
+	{IT_STRING|IT_CVAR,         NULL, "Bit depth",           &cv_scr_depth,           124},
+	{IT_STRING|IT_CVAR,         NULL, "Texture filter",      &cv_glfiltermode,        134},
+	{IT_STRING|IT_CVAR,         NULL, "Anisotropic",         &cv_glanisotropicmode,   144},
 #ifdef ALAM_LIGHTING
-	{IT_SUBMENU|IT_STRING,      NULL, "Lighting...",         &OP_OpenGLLightingDef,   144},
+	{IT_SUBMENU|IT_STRING,      NULL, "Lighting...",         &OP_OpenGLLightingDef,   154},
 #endif
 #if defined (_WINDOWS) && (!(defined (__unix__) || defined (UNIXCOMMON) || defined (HAVE_SDL)))
-	{IT_STRING|IT_CVAR,         NULL, "Fullscreen",          &cv_fullscreen,          154},
+	{IT_STRING|IT_CVAR,         NULL, "Fullscreen",          &cv_fullscreen,          164},
 #endif
 };
 
diff --git a/src/m_misc.c b/src/m_misc.c
index 1b6a90c50acd6230cb9d8c56f99df7178926c777..797aca739933ed2cdf26b871e65a5b4b28f191f4 100644
--- a/src/m_misc.c
+++ b/src/m_misc.c
@@ -1254,7 +1254,7 @@ void M_SaveFrame(void)
 	// paranoia: should be unnecessary without singletics
 	static tic_t oldtic = 0;
 
-	if (oldtic == I_GetTime())
+	if (oldtic == I_GetTime() && !singletics)
 		return;
 	else
 		oldtic = I_GetTime();
diff --git a/src/netcode/i_tcp.c b/src/netcode/i_tcp.c
index 96342d5c6c55b901e2ce568aa9a62bfe330b4b3a..2a2f849651f6c9e5ff12ca68c5d153d1cff24dd1 100644
--- a/src/netcode/i_tcp.c
+++ b/src/netcode/i_tcp.c
@@ -1134,10 +1134,10 @@ boolean I_InitTcpDriver(void)
 	{
 		I_AddExitFunc(I_ShutdownTcpDriver);
 #ifdef HAVE_MINIUPNPC
-		if (M_CheckParm("-noUPnP"))
-			UPNP_support = false;
-		else
+		if (M_CheckParm("-useUPnP"))
 			I_InitUPnP();
+		else
+			UPNP_support = false;
 #endif
 	}
 	return init_tcp_driver;
diff --git a/src/p_mobj.c b/src/p_mobj.c
index acee8733f46e1a9169dbdba703ae9ccb45e73656..628280a8d061d4945ca4a346100f63334ba48fa6 100644
--- a/src/p_mobj.c
+++ b/src/p_mobj.c
@@ -2329,9 +2329,9 @@ boolean P_CheckDeathPitCollide(mobj_t *mo)
 	if (mo->player && mo->player->pflags & PF_GODMODE)
 		return false;
 
-	if (((mo->z <= mo->subsector->sector->floorheight
+	if (((mo->z <= mo->floorz
 		&& ((mo->subsector->sector->flags & MSF_TRIGGERSPECIAL_HEADBUMP) || !(mo->eflags & MFE_VERTICALFLIP)) && (mo->subsector->sector->flags & MSF_FLIPSPECIAL_FLOOR))
-	|| (mo->z + mo->height >= mo->subsector->sector->ceilingheight
+	|| (mo->z + mo->height >= mo->ceilingz
 		&& ((mo->subsector->sector->flags & MSF_TRIGGERSPECIAL_HEADBUMP) || (mo->eflags & MFE_VERTICALFLIP)) && (mo->subsector->sector->flags & MSF_FLIPSPECIAL_CEILING)))
 	&& (mo->subsector->sector->damagetype == SD_DEATHPITTILT
 	|| mo->subsector->sector->damagetype == SD_DEATHPITNOTILT))
diff --git a/src/r_data.c b/src/r_data.c
index 0a13d27dbaf55f9e9092adeb0de4635523c96606..56fe9403983ab29e2d60428672e8b4551e715ae4 100644
--- a/src/r_data.c
+++ b/src/r_data.c
@@ -31,6 +31,10 @@
 #include "byteptr.h"
 #include "dehacked.h"
 
+#ifdef HWRENDER
+#include "hardware/hw_glob.h" // HWR_ClearLightTables
+#endif
+
 //
 // Graphics.
 // SRB2 graphics for walls and sprites
@@ -426,6 +430,9 @@ void R_ClearColormaps(void)
 {
 	// Purged by PU_LEVEL, just overwrite the pointer
 	extra_colormaps = R_CreateDefaultColormap(true);
+#ifdef HWRENDER
+	HWR_ClearLightTables();
+#endif
 }
 
 //
diff --git a/src/r_defs.h b/src/r_defs.h
index d556b540f8a68794986e04da72c58e6d7af8f9a2..65fd883c91a9ccb7b02fbcfa7864d1783ca987ef 100644
--- a/src/r_defs.h
+++ b/src/r_defs.h
@@ -25,6 +25,10 @@
 
 #include "screen.h" // MAXVIDWIDTH, MAXVIDHEIGHT
 
+#ifdef HWRENDER
+#include "m_aatree.h"
+#endif
+
 #include "taglist.h"
 
 //
@@ -69,6 +73,11 @@ typedef struct extracolormap_s
 
 	lighttable_t *colormap;
 
+#ifdef HWRENDER
+	// The id of the hardware lighttable. Zero means it does not exist yet.
+	UINT32 gl_lighttable_id;
+#endif
+
 #ifdef EXTRACOLORMAPLUMPS
 	lumpnum_t lump; // for colormap lump matching, init to LUMPERROR
 	char lumpname[9]; // for netsyncing
@@ -897,6 +906,26 @@ typedef struct
 	// the [0] is &columnofs[width]
 } ATTRPACK softwarepatch_t;
 
+#ifdef _MSC_VER
+#pragma warning(disable :  4200)
+#endif
+
+// a pic is an unmasked block of pixels, stored in horizontal way
+typedef struct
+{
+	INT16 width;
+	UINT8 zero;       // set to 0 allow autodetection of pic_t
+	                 // mode instead of patch or raw
+	UINT8 mode;       // see pic_mode_t above
+	INT16 height;
+	INT16 reserved1; // set to 0
+	UINT8 data[0];
+} ATTRPACK pic_t;
+
+#ifdef _MSC_VER
+#pragma warning(default : 4200)
+#endif
+
 #if defined(_MSC_VER)
 #pragma pack()
 #endif
diff --git a/src/r_textures.c b/src/r_textures.c
index 0175a080e7cbd901bee865429660ddb3de51af9f..59cc114139c5abb24df0f47fa0dddd0e70ec8d72 100644
--- a/src/r_textures.c
+++ b/src/r_textures.c
@@ -193,6 +193,8 @@ static void R_DrawBlendColumnInCache(column_t *column, UINT8 *cache, texpatch_t
 		{
 			for (; dest < cache + position + count; source++, dest++, is_opaque++)
 			{
+				if (originPatch->alpha <= ASTTextureBlendingThreshold[1] && !(*is_opaque))
+					continue;
 				*dest = ASTBlendPaletteIndexes(*dest, *source, originPatch->style, originPatch->alpha);
 				*is_opaque = true;
 			}
@@ -237,6 +239,8 @@ static void R_DrawBlendFlippedColumnInCache(column_t *column, UINT8 *cache, texp
 		{
 			for (; dest < cache + position + count; --source, dest++, is_opaque++)
 			{
+				if (originPatch->alpha <= ASTTextureBlendingThreshold[1] && !(*is_opaque))
+					continue;
 				*dest = ASTBlendPaletteIndexes(*dest, *source, originPatch->style, originPatch->alpha);
 				*is_opaque = true;
 			}
diff --git a/src/sdl/Srb2SDL-vc10.vcxproj b/src/sdl/Srb2SDL-vc10.vcxproj
index 0f0355c6f28ad2afbd6d201f7fba46897e73f1f3..5e30970286724f98e6a220ff658207be0119dd84 100644
--- a/src/sdl/Srb2SDL-vc10.vcxproj
+++ b/src/sdl/Srb2SDL-vc10.vcxproj
@@ -495,6 +495,7 @@
     <ClCompile Include="..\hardware\hw_md2load.c" />
     <ClCompile Include="..\hardware\hw_md3load.c" />
     <ClCompile Include="..\hardware\hw_model.c" />
+    <ClCompile Include="..\hardware\hw_shaders.c" />
     <ClCompile Include="..\hardware\r_opengl\r_opengl.c" />
     <ClCompile Include="..\lua_colorlib.c" />
     <ClCompile Include="..\r_bbox.c" />
diff --git a/src/sdl/Srb2SDL-vc10.vcxproj.filters b/src/sdl/Srb2SDL-vc10.vcxproj.filters
index 183843018f84087b8555fbce59e638e67cfa3546..35d47fad1fb38f008138348d77d6c242b7e71ccf 100644
--- a/src/sdl/Srb2SDL-vc10.vcxproj.filters
+++ b/src/sdl/Srb2SDL-vc10.vcxproj.filters
@@ -751,6 +751,12 @@
     <ClCompile Include="..\hardware\hw_model.c">
       <Filter>Hw_Hardware</Filter>
     </ClCompile>
+    <ClCompile Include="..\hardware\hw_shaders.c">
+      <Filter>Hw_Hardware</Filter>
+    </ClCompile>
+    <ClCompile Include="..\hardware\u_list.c">
+      <Filter>Hw_Hardware</Filter>
+    </ClCompile>
     <ClCompile Include="..\filesrch.c">
       <Filter>I_Interface</Filter>
     </ClCompile>
diff --git a/src/sdl/hwsym_sdl.c b/src/sdl/hwsym_sdl.c
index 96e3d7d6926ef23771c8dcf489b4d8d2a16c0a1c..ca87fcc7951758e9a5de7ca1a405f88b1f202204 100644
--- a/src/sdl/hwsym_sdl.c
+++ b/src/sdl/hwsym_sdl.c
@@ -74,7 +74,7 @@ void *hwSym(const char *funcName,void *handle)
 {
 	void *funcPointer = NULL;
 #ifdef HWRENDER
-	if (0 == strcmp("SetPalette", funcName))
+	if (0 == strcmp("SetTexturePalette", funcName))
 		funcPointer = &OglSdlSetPalette;
 
 	GETFUNC(Init);
@@ -87,7 +87,7 @@ void *hwSym(const char *funcName,void *handle)
 	GETFUNC(SetTexture);
 	GETFUNC(UpdateTexture);
 	GETFUNC(DeleteTexture);
-	GETFUNC(ReadRect);
+	GETFUNC(ReadScreenTexture);
 	GETFUNC(GClipRect);
 	GETFUNC(ClearMipMapCache);
 	GETFUNC(SetSpecialState);
@@ -97,21 +97,23 @@ void *hwSym(const char *funcName,void *handle)
 	GETFUNC(SetTransform);
 	GETFUNC(PostImgRedraw);
 	GETFUNC(FlushScreenTextures);
-	GETFUNC(StartScreenWipe);
-	GETFUNC(EndScreenWipe);
 	GETFUNC(DoScreenWipe);
-	GETFUNC(DrawIntermissionBG);
+	GETFUNC(DrawScreenTexture);
 	GETFUNC(MakeScreenTexture);
-	GETFUNC(MakeScreenFinalTexture);
 	GETFUNC(DrawScreenFinalTexture);
 
-	GETFUNC(CompileShaders);
-	GETFUNC(CleanShaders);
+	GETFUNC(InitShaders);
+	GETFUNC(LoadShader);
+	GETFUNC(CompileShader);
 	GETFUNC(SetShader);
 	GETFUNC(UnSetShader);
 
 	GETFUNC(SetShaderInfo);
-	GETFUNC(LoadCustomShader);
+
+	GETFUNC(SetPaletteLookup);
+	GETFUNC(CreateLightTable);
+	GETFUNC(ClearLightTables);
+	GETFUNC(SetScreenPalette);
 
 #else //HWRENDER
 	if (0 == strcmp("FinishUpdate", funcName))
diff --git a/src/sdl/i_video.c b/src/sdl/i_video.c
index d3a602c05803fcd5d6aced670321aad5c723ea74..1005af9d91b41fef8789b1c44cdff72dc3c9349d 100644
--- a/src/sdl/i_video.c
+++ b/src/sdl/i_video.c
@@ -1297,6 +1297,14 @@ void I_FinishUpdate(void)
 #ifdef HWRENDER
 	else if (rendermode == render_opengl)
 	{
+		// Final postprocess step of palette rendering, after everything else has been drawn.
+		if (HWR_ShouldUsePaletteRendering())
+		{
+			HWD.pfnMakeScreenTexture(HWD_SCREENTEXTURE_GENERIC2);
+			HWD.pfnSetShader(HWR_GetShaderFromTarget(SHADER_PALETTE_POSTPROCESS));
+			HWD.pfnDrawScreenTexture(HWD_SCREENTEXTURE_GENERIC2, NULL, 0);
+			HWD.pfnUnSetShader();
+		}
 		OglSdlFinishUpdate(cv_vidwait.value);
 	}
 #endif
@@ -1958,32 +1966,34 @@ void VID_StartupOpenGL(void)
 		HWD.pfnSetTexture       = hwSym("SetTexture",NULL);
 		HWD.pfnUpdateTexture    = hwSym("UpdateTexture",NULL);
 		HWD.pfnDeleteTexture    = hwSym("DeleteTexture",NULL);
-		HWD.pfnReadRect         = hwSym("ReadRect",NULL);
+		HWD.pfnReadScreenTexture= hwSym("ReadScreenTexture",NULL);
 		HWD.pfnGClipRect        = hwSym("GClipRect",NULL);
 		HWD.pfnClearMipMapCache = hwSym("ClearMipMapCache",NULL);
 		HWD.pfnSetSpecialState  = hwSym("SetSpecialState",NULL);
-		HWD.pfnSetPalette       = hwSym("SetPalette",NULL);
+		HWD.pfnSetTexturePalette= hwSym("SetTexturePalette",NULL);
 		HWD.pfnGetTextureUsed   = hwSym("GetTextureUsed",NULL);
 		HWD.pfnDrawModel        = hwSym("DrawModel",NULL);
 		HWD.pfnCreateModelVBOs  = hwSym("CreateModelVBOs",NULL);
 		HWD.pfnSetTransform     = hwSym("SetTransform",NULL);
 		HWD.pfnPostImgRedraw    = hwSym("PostImgRedraw",NULL);
 		HWD.pfnFlushScreenTextures=hwSym("FlushScreenTextures",NULL);
-		HWD.pfnStartScreenWipe  = hwSym("StartScreenWipe",NULL);
-		HWD.pfnEndScreenWipe    = hwSym("EndScreenWipe",NULL);
 		HWD.pfnDoScreenWipe     = hwSym("DoScreenWipe",NULL);
-		HWD.pfnDrawIntermissionBG=hwSym("DrawIntermissionBG",NULL);
+		HWD.pfnDrawScreenTexture= hwSym("DrawScreenTexture",NULL);
 		HWD.pfnMakeScreenTexture= hwSym("MakeScreenTexture",NULL);
-		HWD.pfnMakeScreenFinalTexture=hwSym("MakeScreenFinalTexture",NULL);
 		HWD.pfnDrawScreenFinalTexture=hwSym("DrawScreenFinalTexture",NULL);
 
-		HWD.pfnCompileShaders   = hwSym("CompileShaders",NULL);
-		HWD.pfnCleanShaders     = hwSym("CleanShaders",NULL);
+		HWD.pfnInitShaders      = hwSym("InitShaders",NULL);
+		HWD.pfnLoadShader       = hwSym("LoadShader",NULL);
+		HWD.pfnCompileShader    = hwSym("CompileShader",NULL);
 		HWD.pfnSetShader        = hwSym("SetShader",NULL);
 		HWD.pfnUnSetShader      = hwSym("UnSetShader",NULL);
 
 		HWD.pfnSetShaderInfo    = hwSym("SetShaderInfo",NULL);
-		HWD.pfnLoadCustomShader = hwSym("LoadCustomShader",NULL);
+
+		HWD.pfnSetPaletteLookup = hwSym("SetPaletteLookup",NULL);
+		HWD.pfnCreateLightTable = hwSym("CreateLightTable",NULL);
+		HWD.pfnClearLightTables = hwSym("ClearLightTables",NULL);
+		HWD.pfnSetScreenPalette = hwSym("SetScreenPalette",NULL);
 
 		vid.glstate = HWD.pfnInit() ? VID_GL_LIBRARY_LOADED : VID_GL_LIBRARY_ERROR; // let load the OpenGL library
 
diff --git a/src/sdl/ogl_sdl.c b/src/sdl/ogl_sdl.c
index db0538195bbb6d98badb0fc27c5b0a9fac34fa81..e7347547e224b43f2fcdf66e527c8adc2681e243 100644
--- a/src/sdl/ogl_sdl.c
+++ b/src/sdl/ogl_sdl.c
@@ -232,7 +232,9 @@ void OglSdlFinishUpdate(boolean waitvbl)
 
 	// Sryder:	We need to draw the final screen texture again into the other buffer in the original position so that
 	//			effects that want to take the old screen can do so after this
-	HWR_DrawScreenFinalTexture(realwidth, realheight);
+	// Generic2 has the screen image without palette rendering brightness adjustments.
+	// Using that here will prevent brightness adjustments being applied twice.
+	DrawScreenTexture(HWD_SCREENTEXTURE_GENERIC2, NULL, 0);
 }
 
 EXPORT void HWRAPI(OglSdlSetPalette) (RGBA_t *palette)
diff --git a/src/st_stuff.c b/src/st_stuff.c
index be676cff46d08541b56577b2cfdd4fd9f0971247..3d730fb4f099145cb8a62c1e7544b5cf6d705978 100644
--- a/src/st_stuff.c
+++ b/src/st_stuff.c
@@ -227,8 +227,8 @@ void ST_doPaletteStuff(void)
 		palette = 0;
 
 #ifdef HWRENDER
-	if (rendermode == render_opengl)
-		palette = 0; // No flashpals here in OpenGL
+	if (rendermode == render_opengl && !HWR_ShouldUsePaletteRendering())
+		palette = 0; // Don't set the palette to a flashpal in OpenGL's truecolor mode
 #endif
 
 	if (palette != st_palette)
@@ -2893,7 +2893,7 @@ void ST_Drawer(void)
 	//25/08/99: Hurdler: palette changes is done for all players,
 	//                   not only player1! That's why this part
 	//                   of code is moved somewhere else.
-	if (rendermode == render_soft)
+	if (rendermode == render_soft || HWR_ShouldUsePaletteRendering())
 #endif
 		if (rendermode != render_none) ST_doPaletteStuff();
 
diff --git a/src/w_wad.c b/src/w_wad.c
index 3a50646930e984762ffb720f2c2022edac075e33..dbf9d1ad8646828952bf5a4310a1db05ac402700 100644
--- a/src/w_wad.c
+++ b/src/w_wad.c
@@ -821,10 +821,7 @@ static void W_ReadFileShaders(wadfile_t *wadfile)
 {
 #ifdef HWRENDER
 	if (rendermode == render_opengl && (vid.glstate == VID_GL_LIBRARY_LOADED))
-	{
 		HWR_LoadCustomShadersFromFile(numwadfiles - 1, W_FileHasFolders(wadfile));
-		HWR_CompileShaders();
-	}
 #else
 	(void)wadfile;
 #endif