diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 898fcc78948ca85f6f89254718f1c157c218e024..0f7e9c479a7412282ffaf3df897f8a2d2dfe738b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -225,6 +225,7 @@ target_compile_definitions(SRB2SDL2 PRIVATE -DHAVE_DISCORDRPC -DUSE_STUN)
 target_sources(SRB2SDL2 PRIVATE discord.c stun.c)
 target_link_libraries(SRB2SDL2 PRIVATE tcbrindle::span)
+target_link_libraries(SRB2SDL2 PRIVATE stb_rect_pack)
 target_link_libraries(SRB2SDL2 PRIVATE stb_vorbis)
 target_link_libraries(SRB2SDL2 PRIVATE xmp-lite::xmp-lite)
 target_link_libraries(SRB2SDL2 PRIVATE glad::glad)
diff --git a/src/cxxutil.hpp b/src/cxxutil.hpp
index 06f6f1adc25e8916f0511bce4d93f30e12d5d452..56b85c79b476d469746c46e5f3419556b0d6dd60 100644
--- a/src/cxxutil.hpp
+++ b/src/cxxutil.hpp
@@ -162,6 +162,17 @@ struct Overload : Ts... {
 template <typename... Ts>
 Overload(Ts...) -> Overload<Ts...>;
+inline void hash_combine(std::size_t& seed)
+template <class T, typename... Rest>
+inline void hash_combine(std::size_t& seed, const T& v, Rest... rest)
+	std::hash<T> hasher;
+	seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+	hash_combine(seed, std::forward<Rest>(rest)...);
 } // namespace srb2
 #endif // __SRB2_CXXUTIL_HPP__
diff --git a/src/f_finale.h b/src/f_finale.h
index ca110821431c66d6a839e4a44d79946b077a5993..e2d4599a262318942c56a12a1a7957bf39da9bb8 100644
--- a/src/f_finale.h
+++ b/src/f_finale.h
@@ -140,6 +140,10 @@ extern UINT16 curtttics;
 extern boolean WipeInAction;
+extern UINT8 g_wipetype;
+extern UINT8 g_wipeframe;
+extern boolean g_wipereverse;
+extern boolean g_wipeskiprender;
 extern boolean WipeStageTitle;
 extern INT32 lastwipetic;
diff --git a/src/f_wipe.c b/src/f_wipe.c
index b15c4b171ee0ece7a7f2f10d17b4017000fcca0b..98097b32c201e9162dde6f5b78e23d9fb3289849 100644
--- a/src/f_wipe.c
+++ b/src/f_wipe.c
@@ -90,6 +90,10 @@ UINT8 wipedefs[NUMWIPEDEFS] = {
 boolean WipeInAction = false;
+UINT8 g_wipetype = 0;
+UINT8 g_wipeframe = 0;
+boolean g_wipereverse = false;
+boolean g_wipeskiprender = false;
 boolean WipeStageTitle = false;
 INT32 lastwipetic = 0;
@@ -189,152 +193,6 @@ static fademask_t *F_GetFadeMask(UINT8 masknum, UINT8 scrnnum) {
 	return NULL;
-/**	Wipe ticker
-  *
-  * \param	fademask	pixels to change
-  */
-static void F_DoWipe(fademask_t *fademask, lighttable_t *fadecolormap, boolean reverse)
-	// Software mask wipe -- optimized; though it might not look like it!
-	// Okay, to save you wondering *how* this is more optimized than the simpler
-	// version that came before it...
-	// ---
-	// The previous code did two FixedMul calls for every single pixel on the
-	// screen, of which there are hundreds of thousands -- if not millions -- of.
-	// This worked fine for smaller screen sizes, but with excessively large
-	// (1920x1200) screens that meant 4 million+ calls out to FixedMul, and that
-	// would take /just/ long enough that fades would start to noticably lag.
-	// ---
-	// This code iterates over the fade mask's pixels instead of the screen's,
-	// and deals with drawing over each rectangular area before it moves on to
-	// the next pixel in the fade mask.  As a result, it's more complex (and might
-	// look a little messy; sorry!) but it simultaneously runs at twice the speed.
-	// In addition, we precalculate all the X and Y positions that we need to draw
-	// from and to, so it uses a little extra memory, but again, helps it run faster.
-	// ---
-	// Sal: I kinda destroyed some of this code by introducing Genesis-style fades.
-	// A colormap can be provided in F_RunWipe, which the white/black values will be
-	// remapped to the appropriate entry in the fade colormap.
-	{
-		// wipe screen, start, end
-		UINT8       *w = wipe_scr;
-		const UINT8 *s = wipe_scr_start;
-		const UINT8 *e = wipe_scr_end;
-		// first pixel for each screen
-		UINT8       *w_base = w;
-		const UINT8 *s_base = s;
-		const UINT8 *e_base = e;
-		// mask data, end
-		UINT8       *transtbl;
-		const UINT8 *mask    = fademask->mask;
-		const UINT8 *maskend = mask + fademask->size;
-		// rectangle draw hints
-		UINT32 draw_linestart, draw_rowstart;
-		UINT32 draw_lineend,   draw_rowend;
-		UINT32 draw_linestogo, draw_rowstogo;
-		// rectangle coordinates, etc.
-		UINT16* scrxpos = (UINT16*)malloc((fademask->width + 1)  * sizeof(UINT16));
-		UINT16* scrypos = (UINT16*)malloc((fademask->height + 1) * sizeof(UINT16));
-		UINT16 maskx, masky;
-		UINT32 relativepos;
-		// ---
-		// Screw it, we do the fixed point math ourselves up front.
-		scrxpos[0] = 0;
-		for (relativepos = 0, maskx = 1; maskx < fademask->width; ++maskx)
-			scrxpos[maskx] = (relativepos += fademask->xscale)>>FRACBITS;
-		scrxpos[fademask->width] = vid.width;
-		scrypos[0] = 0;
-		for (relativepos = 0, masky = 1; masky < fademask->height; ++masky)
-			scrypos[masky] = (relativepos += fademask->yscale)>>FRACBITS;
-		scrypos[fademask->height] = vid.height;
-		// ---
-		maskx = masky = 0;
-		do
-		{
-			UINT8 m = *mask;
-			draw_rowstart = scrxpos[maskx];
-			draw_rowend   = scrxpos[maskx + 1];
-			draw_linestart = scrypos[masky];
-			draw_lineend   = scrypos[masky + 1];
-			relativepos = (draw_linestart * vid.width) + draw_rowstart;
-			draw_linestogo = draw_lineend - draw_linestart;
-			if (reverse)
-				m = ((pallen-1) - m);
-			if (m == 0)
-			{
-				// shortcut - memcpy source to work
-				while (draw_linestogo--)
-				{
-					M_Memcpy(w_base+relativepos, (reverse ? e_base : s_base)+relativepos, draw_rowend-draw_rowstart);
-					relativepos += vid.width;
-				}
-			}
-			else if (m >= (pallen-1))
-			{
-				// shortcut - memcpy target to work
-				while (draw_linestogo--)
-				{
-					M_Memcpy(w_base+relativepos, (reverse ? s_base : e_base)+relativepos, draw_rowend-draw_rowstart);
-					relativepos += vid.width;
-				}
-			}
-			else
-			{
-				// pointer to transtable that this mask would use
-				transtbl = transtables + ((9 - m)<<FF_TRANSSHIFT);
-				while (draw_linestogo--)
-				{
-					w = w_base + relativepos;
-					s = s_base + relativepos;
-					e = e_base + relativepos;
-					draw_rowstogo = draw_rowend - draw_rowstart;
-					if (fadecolormap)
-					{
-						if (reverse)
-							s = e;
-						while (draw_rowstogo--)
-							*w++ = fadecolormap[ ( m << 8 ) + *s++ ];
-					}
-					else while (draw_rowstogo--)
-					{
-						/*if (fadecolormap != NULL)
-						{
-							if (reverse)
-								*w++ = fadecolormap[ ( m << 8 ) + *e++ ];
-							else
-								*w++ = fadecolormap[ ( m << 8 ) + *s++ ];
-						}
-						else*/
-							*w++ = transtbl[ ( *e++ << 8 ) + *s++ ];
-					}
-					relativepos += vid.width;
-				}
-			}
-			if (++maskx >= fademask->width)
-				++masky, maskx = 0;
-		} while (++mask < maskend);
-		free(scrxpos);
-		free(scrypos);
-	}
 /** Save the "before" screen of a wipe.
@@ -467,6 +325,7 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu, const char *colormap, boolean r
 	// Init the wipe
 	WipeInAction = true;
+	g_wipeskiprender = false;
 	wipe_scr = screens[0];
 	// lastwipetic should either be 0 or the tic we last wiped
@@ -494,7 +353,10 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu, const char *colormap, boolean r
 		if (rendermode != render_none) //this allows F_RunWipe to be called in dedicated servers
-			F_DoWipe(fmask, fcolor, reverse);
+			// F_DoWipe(fmask, fcolor, reverse);
+			g_wipetype = wipetype;
+			g_wipeframe = wipeframe - 1;
+			g_wipereverse = reverse;
 			if (encorewiggle)
@@ -521,6 +383,12 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu, const char *colormap, boolean r
 		I_FinishUpdate(); // page flip or blit buffer
+		if (rendermode != render_none)
+		{
+			// Skip subsequent renders until the end of the wipe to preserve the current frame.
+			g_wipeskiprender = true;
+		}
 		if (moviemode)
@@ -528,6 +396,7 @@ void F_RunWipe(UINT8 wipetype, boolean drawMenu, const char *colormap, boolean r
 	WipeInAction = false;
+	g_wipeskiprender = false;
 	if (fcolor)
diff --git a/src/hwr2/CMakeLists.txt b/src/hwr2/CMakeLists.txt
index 50481810520ac9e551709d52c213566ca7b9d6d4..34aa2186fb945c7232800cd29d82cd3b4320d086 100644
--- a/src/hwr2/CMakeLists.txt
+++ b/src/hwr2/CMakeLists.txt
@@ -1,8 +1,20 @@
 target_sources(SRB2SDL2 PRIVATE
+	pass_blit_rect.cpp
+	pass_blit_rect.hpp
+	pass_manager.cpp
+	pass_manager.hpp
+	pass_postprocess.cpp
+	pass_postprocess.hpp
+	pass_resource_managers.cpp
+	pass_resource_managers.hpp
+	pass_twodee.cpp
+	pass_twodee.hpp
+	twodee.cpp
+	twodee.hpp
diff --git a/src/hwr2/pass.cpp b/src/hwr2/pass.cpp
index 48b331492882cb49dbce16133c6a0fdeb35c5924..d20be7294f4accf537dee33bb21e83e3526291d5 100644
--- a/src/hwr2/pass.cpp
+++ b/src/hwr2/pass.cpp
@@ -1,3 +1,15 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "pass.hpp"
-srb2::hwr2::Pass::~Pass() = default;
+using namespace srb2;
+using namespace srb2::hwr2;
+Pass::~Pass() = default;
diff --git a/src/hwr2/pass.hpp b/src/hwr2/pass.hpp
index 2556bf8f855b7740d299102f541562520291c030..a745bd12bf2dd2d75808e70fcc179b12cefd191b 100644
--- a/src/hwr2/pass.hpp
+++ b/src/hwr2/pass.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #ifndef __SRB2_HWR2_PASS_HPP__
 #define __SRB2_HWR2_PASS_HPP__
@@ -8,7 +17,9 @@ namespace srb2::hwr2
 /// @brief A rendering pass which performs logic during each phase of a frame render.
 /// During rendering, all registered Pass's individual stages will be run together.
-struct Pass {
+class Pass
 	virtual ~Pass();
 	/// @brief Perform rendering logic and create necessary GPU resources.
diff --git a/src/hwr2/pass_blit_rect.cpp b/src/hwr2/pass_blit_rect.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..56fb4f6dce900109aeb3ba323fcbc8c45b6a333d
--- /dev/null
+++ b/src/hwr2/pass_blit_rect.cpp
@@ -0,0 +1,209 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "pass_blit_rect.hpp"
+#include <optional>
+#include <tcb/span.hpp>
+#include "../cxxutil.hpp"
+using namespace srb2;
+using namespace srb2::hwr2;
+using namespace srb2::rhi;
+struct BlitVertex
+	float x = 0.f;
+	float y = 0.f;
+	float z = 0.f;
+	float u = 0.f;
+	float v = 0.f;
+} // namespace
+static const BlitVertex kVerts[] =
+	{{-.5f, -.5f, 0.f, 0.f, 0.f}, {.5f, -.5f, 0.f, 1.f, 0.f}, {-.5f, .5f, 0.f, 0.f, 1.f}, {.5f, .5f, 0.f, 1.f, 1.f}};
+static const uint16_t kIndices[] = {0, 1, 2, 1, 3, 2};
+/// @brief Pipeline used for paletted source textures. Requires the texture and the palette texture.
+static const PipelineDesc kPalettedPipelineDescription = {
+	PipelineProgram::kUnshadedPaletted,
+	{{{sizeof(BlitVertex)}}, {{VertexAttributeName::kPosition, 0, 0}, {VertexAttributeName::kTexCoord0, 0, 12}}},
+	{{{{UniformName::kProjection}}, {{UniformName::kModelView, UniformName::kTexCoord0Transform}}}},
+	{{// R8 index texture
+	  SamplerName::kSampler0,
+	  // 256x1 palette texture
+	  SamplerName::kSampler1}},
+	std::nullopt,
+	{PixelFormat::kRGBA8, std::nullopt, {true, true, true, true}},
+	PrimitiveType::kTriangles,
+	CullMode::kNone,
+	FaceWinding::kCounterClockwise,
+	{0.f, 0.f, 0.f, 1.f}};
+/// @brief Pipeline used for non-paletted source textures.
+static const PipelineDesc kUnshadedPipelineDescription = {
+	PipelineProgram::kUnshaded,
+	{{{sizeof(BlitVertex)}}, {{VertexAttributeName::kPosition, 0, 0}, {VertexAttributeName::kTexCoord0, 0, 12}}},
+	{{{{UniformName::kProjection}}, {{UniformName::kModelView, UniformName::kTexCoord0Transform}}}},
+	{{// RGB/A texture
+	  SamplerName::kSampler0}},
+	std::nullopt,
+	{PixelFormat::kRGBA8, std::nullopt, {true, true, true, true}},
+	PrimitiveType::kTriangles,
+	CullMode::kNone,
+	FaceWinding::kCounterClockwise,
+	{0.f, 0.f, 0.f, 1.f}};
+BlitRectPass::BlitRectPass() : Pass()
+BlitRectPass::BlitRectPass(bool output_clear) : Pass(), output_clear_(output_clear)
+BlitRectPass::BlitRectPass(const std::shared_ptr<MainPaletteManager>& palette_mgr, bool output_clear)
+	: Pass(), output_clear_(output_clear), palette_mgr_(palette_mgr)
+BlitRectPass::~BlitRectPass() = default;
+void BlitRectPass::prepass(Rhi& rhi)
+	if (!pipeline_)
+	{
+		if (palette_mgr_)
+		{
+			pipeline_ = rhi.create_pipeline(kPalettedPipelineDescription);
+		}
+		else
+		{
+			pipeline_ = rhi.create_pipeline(kUnshadedPipelineDescription);
+		}
+	}
+	if (!quad_vbo_)
+	{
+		quad_vbo_ = rhi.create_buffer({sizeof(kVerts), BufferType::kVertexBuffer, BufferUsage::kImmutable});
+		quad_vbo_needs_upload_ = true;
+	}
+	if (!quad_ibo_)
+	{
+		quad_ibo_ = rhi.create_buffer({sizeof(kIndices), BufferType::kIndexBuffer, BufferUsage::kImmutable});
+		quad_ibo_needs_upload_ = true;
+	}
+	if (!render_pass_)
+	{
+		render_pass_ = rhi.create_render_pass(
+			{std::nullopt,
+			 PixelFormat::kRGBA8,
+			 output_clear_ ? AttachmentLoadOp::kClear : AttachmentLoadOp::kLoad,
+			 AttachmentStoreOp::kStore}
+		);
+	}
+void BlitRectPass::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+	if (quad_vbo_needs_upload_ && quad_vbo_)
+	{
+		rhi.update_buffer_contents(ctx, quad_vbo_, 0, tcb::as_bytes(tcb::span(kVerts)));
+		quad_vbo_needs_upload_ = false;
+	}
+	if (quad_ibo_needs_upload_ && quad_ibo_)
+	{
+		rhi.update_buffer_contents(ctx, quad_ibo_, 0, tcb::as_bytes(tcb::span(kIndices)));
+		quad_ibo_needs_upload_ = false;
+	}
+	float aspect = 1.0;
+	float output_aspect = 1.0;
+	if (output_correct_aspect_)
+	{
+		aspect = static_cast<float>(texture_width_) / static_cast<float>(texture_height_);
+		output_aspect = static_cast<float>(output_width_) / static_cast<float>(output_height_);
+	}
+	bool taller = aspect > output_aspect;
+	std::array<rhi::UniformVariant, 1> g1_uniforms = {{
+		// Projection
+		std::array<std::array<float, 4>, 4> {
+			{{taller ? 1.f : 1.f / output_aspect, 0.f, 0.f, 0.f},
+			 {0.f, taller ? -1.f / (1.f / output_aspect) : -1.f, 0.f, 0.f},
+			 {0.f, 0.f, 1.f, 0.f},
+			 {0.f, 0.f, 0.f, 1.f}}},
+	}};
+	std::array<rhi::UniformVariant, 2> g2_uniforms = {
+		{// ModelView
+		 std::array<std::array<float, 4>, 4> {
+			 {{taller ? 2.f : 2.f * aspect, 0.f, 0.f, 0.f},
+			  {0.f, taller ? 2.f * (1.f / aspect) : 2.f, 0.f, 0.f},
+			  {0.f, 0.f, 1.f, 0.f},
+			  {0.f, 0.f, 0.f, 1.f}}},
+		 // Texcoord0 Transform
+		 std::array<std::array<float, 3>, 3> {
+			 {{1.f, 0.f, 0.f}, {0.f, output_flip_ ? -1.f : 1.f, 0.f}, {0.f, 0.f, 1.f}}}}};
+	uniform_sets_[0] = rhi.create_uniform_set(ctx, {g1_uniforms});
+	uniform_sets_[1] = rhi.create_uniform_set(ctx, {g2_uniforms});
+	std::array<rhi::VertexAttributeBufferBinding, 1> vbs = {{{0, quad_vbo_}}};
+	if (palette_mgr_)
+	{
+		std::array<rhi::TextureBinding, 2> tbs = {
+			{{rhi::SamplerName::kSampler0, texture_}, {rhi::SamplerName::kSampler1, palette_mgr_->palette()}}};
+		binding_set_ = rhi.create_binding_set(ctx, pipeline_, {vbs, tbs});
+	}
+	else
+	{
+		std::array<rhi::TextureBinding, 1> tbs = {{{rhi::SamplerName::kSampler0, texture_}}};
+		binding_set_ = rhi.create_binding_set(ctx, pipeline_, {vbs, tbs});
+	}
+static constexpr const rhi::Color kClearColor = {0, 0, 0, 1};
+void BlitRectPass::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+	if (output_)
+	{
+		rhi.begin_render_pass(ctx, {render_pass_, output_, std::nullopt, kClearColor});
+	}
+	else
+	{
+		rhi.begin_default_render_pass(ctx, output_clear_);
+	}
+	rhi.bind_pipeline(ctx, pipeline_);
+	if (output_)
+	{
+		rhi.set_viewport(ctx, {0, 0, output_width_, output_height_});
+	}
+	rhi.bind_uniform_set(ctx, 0, uniform_sets_[0]);
+	rhi.bind_uniform_set(ctx, 1, uniform_sets_[1]);
+	rhi.bind_binding_set(ctx, binding_set_);
+	rhi.bind_index_buffer(ctx, quad_ibo_);
+	rhi.draw_indexed(ctx, 6, 0);
+	rhi.end_render_pass(ctx);
+void BlitRectPass::postpass(Rhi& rhi)
diff --git a/src/hwr2/pass_blit_rect.hpp b/src/hwr2/pass_blit_rect.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d828812cd78c346105f54c78bab673a932c583a4
--- /dev/null
+++ b/src/hwr2/pass_blit_rect.hpp
@@ -0,0 +1,91 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include <array>
+#include "../rhi/rhi.hpp"
+#include "pass.hpp"
+#include "pass_resource_managers.hpp"
+namespace srb2::hwr2
+/// @brief A render pass which blits a rect using a source texture or textures.
+class BlitRectPass final : public Pass
+	rhi::Handle<rhi::Pipeline> pipeline_;
+	rhi::Handle<rhi::Texture> texture_;
+	uint32_t texture_width_ = 0;
+	uint32_t texture_height_ = 0;
+	rhi::Handle<rhi::Texture> output_;
+	uint32_t output_width_ = 0;
+	uint32_t output_height_ = 0;
+	bool output_correct_aspect_ = false;
+	bool output_clear_ = true;
+	bool output_flip_ = false;
+	rhi::Handle<rhi::RenderPass> render_pass_;
+	rhi::Handle<rhi::Buffer> quad_vbo_;
+	rhi::Handle<rhi::Buffer> quad_ibo_;
+	std::array<rhi::Handle<rhi::UniformSet>, 2> uniform_sets_;
+	rhi::Handle<rhi::BindingSet> binding_set_;
+	bool quad_vbo_needs_upload_ = false;
+	bool quad_ibo_needs_upload_ = false;
+	// The presence of a palette manager indicates that the source texture will be paletted. This can't be changed.
+	std::shared_ptr<MainPaletteManager> palette_mgr_;
+	BlitRectPass();
+	BlitRectPass(bool output_clear);
+	BlitRectPass(const std::shared_ptr<MainPaletteManager>& palette_mgr, bool output_clear);
+	virtual ~BlitRectPass();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+	/// @brief Set the next blit texture. Don't call during graphics phase!
+	/// @param texture the texture to use when blitting
+	/// @param width   texture width
+	/// @param height  texture height
+	void set_texture(rhi::Handle<rhi::Texture> texture, uint32_t width, uint32_t height) noexcept
+	{
+		texture_ = texture;
+		texture_width_ = width;
+		texture_height_ = height;
+	}
+	/// @brief Set the next output texture. Don't call during graphics phase!
+	/// @param texture the texture to use as a color buffer
+	/// @param width   texture width
+	/// @param height  texture height
+	void set_output(
+		rhi::Handle<rhi::Texture> color,
+		uint32_t width,
+		uint32_t height,
+		bool correct_aspect,
+		bool flip
+	) noexcept
+	{
+		output_ = color;
+		output_width_ = width;
+		output_height_ = height;
+		output_correct_aspect_ = correct_aspect;
+		output_flip_ = flip;
+	}
+} // namespace srb2::hwr2
+#endif // __SRB2_HWR2_PASS_SOFTWARE_HPP__
diff --git a/src/hwr2/pass_imgui.cpp b/src/hwr2/pass_imgui.cpp
index 6d8e028618011089bade0d79dcdd1f636975168b..e0d1d3cb6e00f715ce5aaaaeb2844c77c9b8aeff 100644
--- a/src/hwr2/pass_imgui.cpp
+++ b/src/hwr2/pass_imgui.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "pass_imgui.hpp"
 #include <imgui.h>
@@ -8,48 +17,32 @@ using namespace srb2;
 using namespace srb2::hwr2;
 using namespace srb2::rhi;
-static const PipelineDesc kPipelineDesc =
+static const PipelineDesc kPipelineDesc = {
-	{
-		{
-			{sizeof(ImDrawVert)}
-		},
-		{
-			{VertexAttributeName::kPosition, 0, 0},
-			{VertexAttributeName::kTexCoord0, 0, 12},
-			{VertexAttributeName::kColor, 0, 24}
-		}
-	},
-	{{
-		{{UniformName::kProjection}},
-		{{UniformName::kModelView, UniformName::kTexCoord0Transform}}
-	}},
-	{{
-		SamplerName::kSampler0
-	}},
-	PipelineDepthAttachmentDesc {
-		PixelFormat::kDepth16,
-		CompareFunc::kAlways,
-		true
-	},
-	{
-		PixelFormat::kRGBA8,
-		BlendDesc {
-			BlendFactor::kSourceAlpha,
-			BlendFactor::kOneMinusSourceAlpha,
-			BlendFunction::kAdd,
-			BlendFactor::kOne,
-			BlendFactor::kOneMinusSourceAlpha,
-			BlendFunction::kAdd
-		},
-		{true, true, true, true}
-	},
+	{{{sizeof(ImDrawVert)}},
+	 {{VertexAttributeName::kPosition, 0, 0},
+	  {VertexAttributeName::kTexCoord0, 0, 12},
+	  {VertexAttributeName::kColor, 0, 24}}},
+	{{{{UniformName::kProjection}}, {{UniformName::kModelView, UniformName::kTexCoord0Transform}}}},
+	{{SamplerName::kSampler0}},
+	PipelineDepthAttachmentDesc {PixelFormat::kDepth16, CompareFunc::kAlways, true},
+	{PixelFormat::kRGBA8,
+	 BlendDesc {
+		 BlendFactor::kSourceAlpha,
+		 BlendFactor::kOneMinusSourceAlpha,
+		 BlendFunction::kAdd,
+		 BlendFactor::kOne,
+		 BlendFactor::kOneMinusSourceAlpha,
+		 BlendFunction::kAdd},
+	 {true, true, true, true}},
-	{0.f, 0.f, 0.f, 1.f}
+	{0.f, 0.f, 0.f, 1.f}};
+ImguiPass::ImguiPass() : Pass()
 ImguiPass::~ImguiPass() = default;
@@ -86,18 +79,10 @@ void ImguiPass::prepass(Rhi& rhi)
 	for (auto list : draw_lists)
 		Handle<Buffer> vbo = rhi.create_buffer(
-			{
-				static_cast<uint32_t>(list->VtxBuffer.size_in_bytes()),
-				BufferType::kVertexBuffer,
-				BufferUsage::kImmutable
-			}
+			{static_cast<uint32_t>(list->VtxBuffer.size_in_bytes()), BufferType::kVertexBuffer, BufferUsage::kImmutable}
 		Handle<Buffer> ibo = rhi.create_buffer(
-			{
-				static_cast<uint32_t>(list->IdxBuffer.size_in_bytes()),
-				BufferType::kIndexBuffer,
-				BufferUsage::kImmutable
-			}
+			{static_cast<uint32_t>(list->IdxBuffer.size_in_bytes()), BufferType::kIndexBuffer, BufferUsage::kImmutable}
 		DrawList hwr2_list;
@@ -126,13 +111,11 @@ void ImguiPass::prepass(Rhi& rhi)
 			draw_cmd.v_offset = cmd.VtxOffset;
 			draw_cmd.i_offset = cmd.IdxOffset;
 			draw_cmd.elems = cmd.ElemCount;
-			draw_cmd.clip =
-			{
+			draw_cmd.clip = {
 				static_cast<int32_t>((data->DisplaySize.y * data->FramebufferScale.y) - clip_max.y),
 				static_cast<uint32_t>(clip_max.x - clip_min.x),
-				static_cast<uint32_t>(clip_max.y - clip_min.y)
-			};
+				static_cast<uint32_t>(clip_max.y - clip_min.y)};
@@ -179,35 +162,20 @@ void ImguiPass::transfer(Rhi& rhi, Handle<TransferContext> ctx)
 		rhi.update_buffer_contents(ctx, ibo, 0, tcb::as_bytes(index_span));
 		// Uniform sets
-		std::array<UniformVariant, 1> g1_uniforms =
-		{{
+		std::array<UniformVariant, 1> g1_uniforms = {{
 			// Projection
-			std::array<std::array<float, 4>, 4>
-			{{
-				{2.f / vid.realwidth, 0.f, 0.f, 0.f},
-				{0.f, 2.f / vid.realheight, 0.f, 0.f},
-				{0.f, 0.f, 1.f, 0.f},
-				{-1.f, 1.f, 0.f, 1.f}
-			}},
-		}};
-		std::array<UniformVariant, 2> g2_uniforms =
-		{{
-			// ModelView
-			std::array<std::array<float, 4>, 4>
-			{{
-				{1.f, 0.f, 0.f, 0.f},
-				{0.f, -1.f, 0.f, 0.f},
-				{0.f, 0.f, 1.f, 0.f},
-				{0.f, 0, 0.f, 1.f}
-			}},
-			// Texcoord0 Transform
-			std::array<std::array<float, 3>, 3>
-			{{
-				{1.f, 0.f, 0.f},
-				{0.f, 1.f, 0.f},
-				{0.f, 0.f, 1.f}
-			}}
+			std::array<std::array<float, 4>, 4> {
+				{{2.f / vid.realwidth, 0.f, 0.f, 0.f},
+				 {0.f, 2.f / vid.realheight, 0.f, 0.f},
+				 {0.f, 0.f, 1.f, 0.f},
+				 {-1.f, 1.f, 0.f, 1.f}}},
+		std::array<UniformVariant, 2> g2_uniforms = {
+			{// ModelView
+			 std::array<std::array<float, 4>, 4> {
+				 {{1.f, 0.f, 0.f, 0.f}, {0.f, -1.f, 0.f, 0.f}, {0.f, 0.f, 1.f, 0.f}, {0.f, 0, 0.f, 1.f}}},
+			 // Texcoord0 Transform
+			 std::array<std::array<float, 3>, 3> {{{1.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}}}};
 		Handle<UniformSet> us_1 = rhi.create_uniform_set(ctx, {g1_uniforms});
 		Handle<UniformSet> us_2 = rhi.create_uniform_set(ctx, {g2_uniforms});
diff --git a/src/hwr2/pass_imgui.hpp b/src/hwr2/pass_imgui.hpp
index 280e7fc9c08aa0e591e6fd69ace3ed0e19b30e00..91d2afe20192d88af9618f554dd41a85c24db3a0 100644
--- a/src/hwr2/pass_imgui.hpp
+++ b/src/hwr2/pass_imgui.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #ifndef __SRB2_HWR2_PASS_IMGUI_HPP__
 #define __SRB2_HWR2_PASS_IMGUI_HPP__
@@ -9,7 +18,7 @@
 namespace srb2::hwr2
-class ImguiPass : public Pass
+class ImguiPass final : public Pass
 	struct DrawCmd
@@ -36,6 +45,7 @@ class ImguiPass : public Pass
 	std::vector<DrawList> draw_lists_;
+	ImguiPass();
 	virtual ~ImguiPass();
 	virtual void prepass(rhi::Rhi& rhi) override;
diff --git a/src/hwr2/pass_manager.cpp b/src/hwr2/pass_manager.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e79ef272506d26c2a8eb60b755807c494d1f0012
--- /dev/null
+++ b/src/hwr2/pass_manager.cpp
@@ -0,0 +1,169 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "pass_manager.hpp"
+using namespace srb2;
+using namespace srb2::hwr2;
+using namespace srb2::rhi;
+class LambdaPass final : public Pass
+	PassManager* mgr_;
+	std::function<void(PassManager&, rhi::Rhi&)> prepass_func_;
+	std::function<void(PassManager&, rhi::Rhi&)> postpass_func_;
+	LambdaPass(PassManager* mgr, std::function<void(PassManager&, rhi::Rhi&)> prepass_func);
+	LambdaPass(
+		PassManager* mgr,
+		std::function<void(PassManager&, rhi::Rhi&)> prepass_func,
+		std::function<void(PassManager&, rhi::Rhi&)> postpass_func
+	);
+	virtual ~LambdaPass();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+} // namespace
+LambdaPass::LambdaPass(PassManager* mgr, std::function<void(PassManager&, rhi::Rhi&)> prepass_func)
+	: mgr_(mgr), prepass_func_(prepass_func)
+	PassManager* mgr,
+	std::function<void(PassManager&, rhi::Rhi&)> prepass_func,
+	std::function<void(PassManager&, rhi::Rhi&)> postpass_func
+	: mgr_(mgr), prepass_func_(prepass_func), postpass_func_(postpass_func)
+LambdaPass::~LambdaPass() = default;
+void LambdaPass::prepass(Rhi& rhi)
+	if (prepass_func_)
+	{
+		(prepass_func_)(*mgr_, rhi);
+	}
+void LambdaPass::transfer(Rhi&, Handle<TransferContext>)
+void LambdaPass::graphics(Rhi&, Handle<GraphicsContext>)
+void LambdaPass::postpass(Rhi& rhi)
+	if (postpass_func_)
+	{
+		(postpass_func_)(*mgr_, rhi);
+	}
+PassManager::PassManager() = default;
+void PassManager::insert(const std::string& name, std::shared_ptr<Pass> pass)
+	SRB2_ASSERT(pass_by_name_.find(name) == pass_by_name_.end());
+	std::size_t index = passes_.size();
+	passes_.push_back(PassManagerEntry {name, pass, true});
+	pass_by_name_.insert({name, index});
+void PassManager::insert(const std::string& name, std::function<void(PassManager&, Rhi&)> prepass_func)
+	insert(std::forward<const std::string>(name), std::make_shared<LambdaPass>(LambdaPass {this, prepass_func}));
+void PassManager::insert(
+	const std::string& name,
+	std::function<void(PassManager&, Rhi&)> prepass_func,
+	std::function<void(PassManager&, Rhi&)> postpass_func
+	insert(
+		std::forward<const std::string>(name),
+		std::make_shared<LambdaPass>(LambdaPass {this, prepass_func, postpass_func})
+	);
+void PassManager::set_pass_enabled(const std::string& name, bool enabled)
+	SRB2_ASSERT(pass_by_name_.find(name) != pass_by_name_.end());
+	passes_[pass_by_name_[name]].enabled = enabled;
+std::weak_ptr<Pass> PassManager::for_name(const std::string& name)
+	auto itr = pass_by_name_.find(name);
+	if (itr == pass_by_name_.end())
+	{
+		return std::weak_ptr<Pass>();
+	}
+	return passes_[itr->second].pass;
+void PassManager::render(Rhi& rhi) const
+	if (passes_.empty())
+	{
+		return;
+	}
+	for (auto& pass : passes_)
+	{
+		if (pass.enabled)
+		{
+			pass.pass->prepass(rhi);
+		}
+	}
+	Handle<TransferContext> tc = rhi.begin_transfer();
+	for (auto& pass : passes_)
+	{
+		if (pass.enabled)
+		{
+			pass.pass->transfer(rhi, tc);
+		}
+	}
+	rhi.end_transfer(tc);
+	Handle<GraphicsContext> gc = rhi.begin_graphics();
+	for (auto& pass : passes_)
+	{
+		if (pass.enabled)
+		{
+			pass.pass->graphics(rhi, gc);
+		}
+	}
+	rhi.end_graphics(gc);
+	for (auto& pass : passes_)
+	{
+		if (pass.enabled)
+		{
+			pass.pass->postpass(rhi);
+		}
+	}
diff --git a/src/hwr2/pass_manager.hpp b/src/hwr2/pass_manager.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..372db893ec3e90b655484485dab550b36bb0f88b
--- /dev/null
+++ b/src/hwr2/pass_manager.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "../rhi/rhi.hpp"
+#include "pass.hpp"
+namespace srb2::hwr2
+class PassManager
+	struct PassManagerEntry
+	{
+		std::string name;
+		std::shared_ptr<Pass> pass;
+		bool enabled;
+	};
+	std::unordered_map<std::string, std::size_t> pass_by_name_;
+	std::vector<PassManagerEntry> passes_;
+	PassManager();
+	PassManager(const PassManager&) = delete;
+	PassManager(PassManager&&) = delete;
+	PassManager& operator=(const PassManager&) = delete;
+	PassManager& operator=(PassManager&&) = delete;
+	void insert(const std::string& name, std::shared_ptr<Pass> pass);
+	void insert(const std::string& name, std::function<void(PassManager&, rhi::Rhi&)> prepass_func);
+	void insert(
+		const std::string& name,
+		std::function<void(PassManager&, rhi::Rhi&)> prepass_func,
+		std::function<void(PassManager&, rhi::Rhi&)> postpass_func
+	);
+	std::weak_ptr<Pass> for_name(const std::string& name);
+	void set_pass_enabled(const std::string& name, bool enabled);
+	void render(rhi::Rhi& rhi) const;
+} // namespace srb2::hwr2
+#endif // __SRB2_HWR2_PASS_MANAGER_HPP__
diff --git a/src/hwr2/pass_postprocess.cpp b/src/hwr2/pass_postprocess.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..89b1ae0051ef627473ac1082aa9a9cad47be0635
--- /dev/null
+++ b/src/hwr2/pass_postprocess.cpp
@@ -0,0 +1,217 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "pass_postprocess.hpp"
+#include <string>
+#include <fmt/format.h>
+#include <tcb/span.hpp>
+#include "../f_finale.h"
+#include "../w_wad.h"
+using namespace srb2;
+using namespace srb2::hwr2;
+using namespace srb2::rhi;
+struct PostprocessVertex
+	float x;
+	float y;
+	float z;
+	float u;
+	float v;
+static const PostprocessVertex kPostprocessVerts[] =
+	{{-.5f, -.5f, 0.f, 0.f, 0.f}, {.5f, -.5f, 0.f, 1.f, 0.f}, {-.5f, .5f, 0.f, 0.f, 1.f}, {.5f, .5f, 0.f, 1.f, 1.f}};
+static const uint16_t kPostprocessIndices[] = {0, 1, 2, 1, 3, 2};
+} // namespace
+static const PipelineDesc kWipePipelineDesc = {
+	PipelineProgram::kPostprocessWipe,
+	{{{sizeof(PostprocessVertex)}},
+	 {
+		 {VertexAttributeName::kPosition, 0, 0},
+		 {VertexAttributeName::kTexCoord0, 0, 12},
+	 }},
+	{{{{UniformName::kProjection}}}},
+	{{SamplerName::kSampler0, SamplerName::kSampler1}},
+	std::nullopt,
+	{PixelFormat::kRGBA8, std::nullopt, {true, true, true, true}},
+	PrimitiveType::kTriangles,
+	CullMode::kNone,
+	FaceWinding::kCounterClockwise,
+	{0.f, 0.f, 0.f, 1.f}};
+PostprocessWipePass::PostprocessWipePass() : Pass()
+PostprocessWipePass::~PostprocessWipePass() = default;
+void PostprocessWipePass::prepass(Rhi& rhi)
+	if (!render_pass_)
+	{
+		render_pass_ = rhi.create_render_pass(
+			{std::nullopt, PixelFormat::kRGBA8, AttachmentLoadOp::kLoad, AttachmentStoreOp::kStore}
+		);
+	}
+	if (!pipeline_)
+	{
+		pipeline_ = rhi.create_pipeline(kWipePipelineDesc);
+	}
+	if (!vbo_)
+	{
+		vbo_ = rhi.create_buffer({sizeof(PostprocessVertex) * 4, BufferType::kVertexBuffer, BufferUsage::kImmutable});
+		upload_vbo_ = true;
+	}
+	if (!ibo_)
+	{
+		ibo_ = rhi.create_buffer({2 * 6, BufferType::kIndexBuffer, BufferUsage::kImmutable});
+		upload_ibo_ = true;
+	}
+	uint32_t wipe_type = g_wipetype;
+	uint32_t wipe_frame = g_wipeframe;
+	bool wipe_reverse = g_wipereverse;
+	if (wipe_type >= 100 || wipe_frame >= 100)
+	{
+		return;
+	}
+	std::string lumpname = fmt::format(FMT_STRING("FADE{:02d}{:02d}"), wipe_type, wipe_frame);
+	lumpnum_t mask_lump = W_CheckNumForName(lumpname.c_str());
+	if (mask_lump == LUMPERROR)
+	{
+		return;
+	}
+	std::size_t mask_lump_size = W_LumpLength(mask_lump);
+	switch (mask_lump_size)
+	{
+	case 256000:
+		mask_w_ = 640;
+		mask_h_ = 400;
+		break;
+	case 64000:
+		mask_w_ = 320;
+		mask_h_ = 200;
+		break;
+	case 16000:
+		mask_w_ = 160;
+		mask_h_ = 100;
+		break;
+	case 4000:
+		mask_w_ = 80;
+		mask_h_ = 50;
+		break;
+	default:
+		return;
+	}
+	mask_data_.clear();
+	mask_data_.resize(mask_lump_size, 0);
+	W_ReadLump(mask_lump, mask_data_.data());
+	if (wipe_reverse)
+	{
+		for (auto& b : mask_data_)
+		{
+			b = 32 - b;
+		}
+	}
+	wipe_tex_ = rhi.create_texture({TextureFormat::kLuminance, mask_w_, mask_h_});
+void PostprocessWipePass::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+	if (wipe_tex_ == kNullHandle)
+	{
+		return;
+	}
+	if (source_ == kNullHandle)
+	{
+		return;
+	}
+	if (upload_vbo_)
+	{
+		rhi.update_buffer_contents(ctx, vbo_, 0, tcb::as_bytes(tcb::span(kPostprocessVerts)));
+		upload_vbo_ = false;
+	}
+	if (upload_ibo_)
+	{
+		rhi.update_buffer_contents(ctx, ibo_, 0, tcb::as_bytes(tcb::span(kPostprocessIndices)));
+		upload_ibo_ = false;
+	}
+	tcb::span<const std::byte> data = tcb::as_bytes(tcb::span(mask_data_));
+	rhi.update_texture(ctx, wipe_tex_, {0, 0, mask_w_, mask_h_}, PixelFormat::kR8, data);
+	UniformVariant uniforms[] = {
+		{// Projection
+		 std::array<std::array<float, 4>, 4> {
+			 {{2.f, 0.f, 0.f, 0.f}, {0.f, 2.f, 0.f, 0.f}, {0.f, 0.f, 1.f, 0.f}, {0.f, 0.f, 0.f, 1.f}}}}};
+	us_ = rhi.create_uniform_set(ctx, {tcb::span(uniforms)});
+	VertexAttributeBufferBinding vbos[] = {{0, vbo_}};
+	TextureBinding tx[] = {{SamplerName::kSampler0, source_}, {SamplerName::kSampler1, wipe_tex_}};
+	bs_ = rhi.create_binding_set(ctx, pipeline_, {vbos, tx});
+void PostprocessWipePass::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+	if (wipe_tex_ == kNullHandle)
+	{
+		return;
+	}
+	if (target_)
+	{
+		rhi.begin_render_pass(ctx, {render_pass_, target_, std::nullopt, {0, 0, 0, 1}});
+	}
+	else
+	{
+		rhi.begin_default_render_pass(ctx, false);
+	}
+	rhi.bind_pipeline(ctx, pipeline_);
+	if (target_)
+	{
+		rhi.set_viewport(ctx, {0, 0, target_w_, target_h_});
+	}
+	rhi.bind_uniform_set(ctx, 0, us_);
+	rhi.bind_binding_set(ctx, bs_);
+	rhi.bind_index_buffer(ctx, ibo_);
+	rhi.draw_indexed(ctx, 6, 0);
+	rhi.end_render_pass(ctx);
+void PostprocessWipePass::postpass(Rhi& rhi)
+	if (wipe_tex_)
+	{
+		rhi.destroy_texture(wipe_tex_);
+		wipe_tex_ = kNullHandle;
+	}
+	mask_data_.clear();
diff --git a/src/hwr2/pass_postprocess.hpp b/src/hwr2/pass_postprocess.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..113d96296c59fa92844bc1cf732ce0b675d4bd0b
--- /dev/null
+++ b/src/hwr2/pass_postprocess.hpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "pass.hpp"
+#include <vector>
+namespace srb2::hwr2
+class PostprocessWipePass final : public Pass
+	rhi::Handle<rhi::RenderPass> render_pass_;
+	rhi::Handle<rhi::Pipeline> pipeline_;
+	rhi::Handle<rhi::Buffer> vbo_;
+	bool upload_vbo_ = false;
+	rhi::Handle<rhi::Buffer> ibo_;
+	bool upload_ibo_ = false;
+	rhi::Handle<rhi::UniformSet> us_;
+	rhi::Handle<rhi::BindingSet> bs_;
+	rhi::Handle<rhi::Texture> wipe_tex_;
+	rhi::Handle<rhi::Texture> source_;
+	uint32_t source_w_ = 0;
+	uint32_t source_h_ = 0;
+	rhi::Handle<rhi::Texture> end_;
+	rhi::Handle<rhi::Texture> target_;
+	uint32_t target_w_ = 0;
+	uint32_t target_h_ = 0;
+	std::vector<uint8_t> mask_data_;
+	uint32_t mask_w_ = 0;
+	uint32_t mask_h_ = 0;
+	PostprocessWipePass();
+	virtual ~PostprocessWipePass();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+	void set_source(rhi::Handle<rhi::Texture> source, uint32_t width, uint32_t height) noexcept
+	{
+		source_ = source;
+		source_w_ = width;
+		source_h_ = height;
+	}
+	void set_end(rhi::Handle<rhi::Texture> end) noexcept { end_ = end; }
+	void set_target(rhi::Handle<rhi::Texture> target, uint32_t width, uint32_t height) noexcept
+	{
+		target_ = target;
+		target_w_ = width;
+		target_h_ = height;
+	}
+} // namespace srb2::hwr2
diff --git a/src/hwr2/pass_resource_managers.cpp b/src/hwr2/pass_resource_managers.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2f5e599c4743bd34f19c5b95b83f31e32d22447a
--- /dev/null
+++ b/src/hwr2/pass_resource_managers.cpp
@@ -0,0 +1,236 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "pass_resource_managers.hpp"
+#include <algorithm>
+#include <cmath>
+#include "../v_video.h"
+#include "../z_zone.h"
+using namespace srb2;
+using namespace srb2::hwr2;
+using namespace srb2::rhi;
+FramebufferManager::FramebufferManager() : Pass()
+FramebufferManager::~FramebufferManager() = default;
+void FramebufferManager::prepass(Rhi& rhi)
+	uint32_t current_width = vid.width;
+	uint32_t current_height = vid.height;
+	// Destroy the framebuffer textures if they exist and the video size changed
+	if (width_ != current_width || height_ != current_height)
+	{
+		if (main_colors_[0] != kNullHandle)
+		{
+			rhi.destroy_texture(main_colors_[0]);
+			main_colors_[0] = kNullHandle;
+		}
+		if (main_colors_[1] != kNullHandle)
+		{
+			rhi.destroy_texture(main_colors_[1]);
+			main_colors_[1] = kNullHandle;
+		}
+		if (main_depth_ != kNullHandle)
+		{
+			rhi.destroy_renderbuffer(main_depth_);
+			main_depth_ = kNullHandle;
+		}
+		if (post_colors_[0] != kNullHandle)
+		{
+			rhi.destroy_texture(post_colors_[0]);
+			post_colors_[0] = kNullHandle;
+		}
+		if (post_colors_[1] != kNullHandle)
+		{
+			rhi.destroy_texture(post_colors_[1]);
+			post_colors_[1] = kNullHandle;
+		}
+	}
+	width_ = current_width;
+	height_ = current_height;
+	// Recreate the framebuffer textures
+	if (main_colors_[0] == kNullHandle)
+	{
+		main_colors_[0] = rhi.create_texture({TextureFormat::kRGBA, current_width, current_height});
+	}
+	if (main_colors_[1] == kNullHandle)
+	{
+		main_colors_[1] = rhi.create_texture({TextureFormat::kRGBA, current_width, current_height});
+	}
+	if (main_depth_ == kNullHandle)
+	{
+		main_depth_ = rhi.create_renderbuffer({PixelFormat::kDepth16, current_width, current_height});
+	}
+	if (post_colors_[0] == kNullHandle)
+	{
+		post_colors_[0] = rhi.create_texture({TextureFormat::kRGBA, current_width, current_height});
+	}
+	if (post_colors_[1] == kNullHandle)
+	{
+		post_colors_[1] = rhi.create_texture({TextureFormat::kRGBA, current_width, current_height});
+	}
+void FramebufferManager::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+void FramebufferManager::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+void FramebufferManager::postpass(Rhi& rhi)
+MainPaletteManager::MainPaletteManager() : Pass()
+MainPaletteManager::~MainPaletteManager() = default;
+void MainPaletteManager::prepass(Rhi& rhi)
+	if (!palette_)
+	{
+		palette_ = rhi.create_texture({TextureFormat::kRGBA, 256, 1});
+	}
+void MainPaletteManager::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+	std::array<byteColor_t, 256> palette_32;
+	for (std::size_t i = 0; i < 256; i++)
+	{
+		palette_32[i] = V_GetColor(i).s;
+	}
+	rhi.update_texture(ctx, palette_, {0, 0, 256, 1}, PixelFormat::kRGBA8, tcb::as_bytes(tcb::span(palette_32)));
+void MainPaletteManager::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+void MainPaletteManager::postpass(Rhi& rhi)
+static uint32_t get_flat_size(lumpnum_t lump)
+	std::size_t lumplength = W_LumpLength(lump);
+	if (lumplength == 0)
+	{
+		return 0;
+	}
+	if ((lumplength & (lumplength - 1)) != 0)
+	{
+		// Lump length is not a power of two and therefore not a flat.
+		return 0;
+	}
+	uint32_t lumpsize = std::pow(2, std::log2(lumplength) / 2);
+	return lumpsize;
+FlatTextureManager::FlatTextureManager() : Pass()
+FlatTextureManager::~FlatTextureManager() = default;
+void FlatTextureManager::prepass(Rhi& rhi)
+void FlatTextureManager::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+	std::vector<std::array<uint8_t, 2>> flat_data;
+	for (auto flat_lump : to_upload_)
+	{
+		flat_data.clear();
+		Handle<Texture> flat_texture = flats_[flat_lump];
+		SRB2_ASSERT(flat_texture != kNullHandle);
+		std::size_t lump_length = W_LumpLength(flat_lump);
+		uint32_t flat_size = get_flat_size(flat_lump);
+		flat_data.reserve(flat_size * flat_size);
+		const uint8_t* flat_memory = static_cast<const uint8_t*>(W_CacheLumpNum(flat_lump, PU_PATCH));
+		SRB2_ASSERT(flat_memory != nullptr);
+		tcb::span<const uint8_t> flat_bytes = tcb::span(flat_memory, lump_length);
+		for (const uint8_t index : flat_bytes)
+		{
+			// The alpha/green channel is set to 0 if it's index 247; this is not usually used but fake floors can be
+			// masked sometimes, so we need to treat it as transparent when rendering them.
+			// See https://zdoom.org/wiki/Palette for remarks on fake 247 transparency
+			flat_data.push_back({index, index == 247 ? static_cast<uint8_t>(0) : static_cast<uint8_t>(255)});
+		}
+		// A flat size of 1 would end up being 2 bytes, so we need 2 more bytes to be unpack-aligned on texture upload
+		// Any other size would implicitly be aligned.
+		// Sure hope nobody tries to load any flats that are too big for the gpu!
+		if (flat_size == 1)
+		{
+			flat_data.push_back({0, 0});
+		}
+		tcb::span<const std::byte> data_bytes = tcb::as_bytes(tcb::span(flat_data));
+		rhi.update_texture(ctx, flat_texture, {0, 0, flat_size, flat_size}, rhi::PixelFormat::kRG8, data_bytes);
+	}
+	to_upload_.clear();
+void FlatTextureManager::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+void FlatTextureManager::postpass(Rhi& rhi)
+Handle<Texture> FlatTextureManager::find_or_create_indexed(Rhi& rhi, lumpnum_t lump)
+	auto flat_itr = flats_.find(lump);
+	if (flat_itr != flats_.end())
+	{
+		return flat_itr->second;
+	}
+	uint32_t flat_size = get_flat_size(lump);
+	Handle<Texture> new_tex = rhi.create_texture({TextureFormat::kLuminanceAlpha, flat_size, flat_size});
+	flats_.insert({lump, new_tex});
+	to_upload_.push_back(lump);
+	return new_tex;
+Handle<Texture> FlatTextureManager::find_indexed(lumpnum_t lump) const
+	auto flat_itr = flats_.find(lump);
+	if (flat_itr != flats_.end())
+	{
+		return flat_itr->second;
+	}
+	return kNullHandle;
diff --git a/src/hwr2/pass_resource_managers.hpp b/src/hwr2/pass_resource_managers.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4eee36b8c0b5eac7f4f2dfa93cce6284d02e0cc9
--- /dev/null
+++ b/src/hwr2/pass_resource_managers.hpp
@@ -0,0 +1,129 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include <array>
+#include <cstddef>
+#include <unordered_map>
+#include <vector>
+#include "pass.hpp"
+namespace srb2::hwr2
+class FramebufferManager final : public Pass
+	std::array<rhi::Handle<rhi::Texture>, 2> main_colors_;
+	rhi::Handle<rhi::Renderbuffer> main_depth_;
+	std::array<rhi::Handle<rhi::Texture>, 2> post_colors_;
+	std::size_t main_index_ = 0;
+	std::size_t post_index_ = 0;
+	std::size_t width_ = 0;
+	std::size_t height_ = 0;
+	bool first_postprocess_ = true;
+	FramebufferManager();
+	virtual ~FramebufferManager();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+	/// @brief Swap the current and previous main colors.
+	void swap_main() noexcept { main_index_ = main_index_ == 0 ? 1 : 0; }
+	/// @brief Swap the current and previous postprocess FB textures. Use between pass prepass phases to alternate.
+	void swap_post() noexcept
+	{
+		post_index_ = post_index_ == 0 ? 1 : 0;
+		first_postprocess_ = false;
+	}
+	void reset_post() noexcept { first_postprocess_ = true; }
+	rhi::Handle<rhi::Texture> current_main_color() const noexcept { return main_colors_[main_index_]; }
+	rhi::Handle<rhi::Renderbuffer> main_depth() const noexcept { return main_depth_; }
+	rhi::Handle<rhi::Texture> previous_main_color() const noexcept { return main_colors_[1 - main_index_]; }
+	rhi::Handle<rhi::Texture> current_post_color() const noexcept { return post_colors_[post_index_]; }
+	rhi::Handle<rhi::Texture> previous_post_color() const noexcept
+	{
+		if (first_postprocess_)
+		{
+			return current_main_color();
+		}
+		return post_colors_[1 - post_index_];
+	};
+	std::size_t width() const noexcept { return width_; }
+	std::size_t height() const noexcept { return height_; }
+class MainPaletteManager final : public Pass
+	rhi::Handle<rhi::Texture> palette_;
+	MainPaletteManager();
+	virtual ~MainPaletteManager();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+	rhi::Handle<rhi::Texture> palette() const noexcept { return palette_; }
+A note to the reader:
+RHI/HWR2's architecture is intentionally decoupled in a data-oriented design fashion. Hash map lookups might technically
+be slower than storing the RHI handle in a hypothetical Flat class object, but it frees us from worrying about the
+validity of a given Handle when the RHI instance changes -- and it _can_, because this is designed to allow multiple
+RHI backends -- because any given Pass must be disposed when the RHI changes. The implementation of I_FinishUpdate is
+such that if the RHI is not the same as before, all passes must be reconstructed, and so we don't have to worry about
+going around and resetting Handle references everywhere. If you're familiar with old GL, it's like decoupling GLmipmap_t
+from patch_t.
+/// @brief Manages textures corresponding to specific flats indexed by lump number.
+class FlatTextureManager final : public Pass
+	std::unordered_map<lumpnum_t, rhi::Handle<rhi::Texture>> flats_;
+	std::vector<lumpnum_t> to_upload_;
+	std::vector<rhi::Handle<rhi::Texture>> disposed_textures_;
+	FlatTextureManager();
+	virtual ~FlatTextureManager();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+	/// @brief Find the indexed texture for a given flat lump, or create one if it doesn't exist yet. Only call this
+	/// in prepass.
+	/// @param flat_lump
+	/// @return
+	rhi::Handle<rhi::Texture> find_or_create_indexed(rhi::Rhi& rhi, lumpnum_t flat_lump);
+	rhi::Handle<rhi::Texture> find_indexed(lumpnum_t flat_lump) const;
+} // namespace srb2::hwr2
diff --git a/src/hwr2/pass_software.cpp b/src/hwr2/pass_software.cpp
index a94b40169b512b3d040e0980d3ae6b8af044118b..aca51a615fe613e00f58133da7778748c993c9d6 100644
--- a/src/hwr2/pass_software.cpp
+++ b/src/hwr2/pass_software.cpp
@@ -1,10 +1,17 @@
-#include "pass_software.hpp"
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
-#include <optional>
+#include "pass_software.hpp"
-#include <tcb/span.hpp>
+#include "../i_video.h"
+#include "../v_video.h"
-#include "../cxxutil.hpp"
 #include "../d_netcmd.h"
 #include "../discord.h"
@@ -13,82 +20,13 @@
 #include "../m_avrecorder.h"
 #include "../st_stuff.h"
 #include "../s_sound.h"
+#include "../st_stuff.h"
 #include "../v_video.h"
 using namespace srb2;
 using namespace srb2::hwr2;
 using namespace srb2::rhi;
-SoftwareBlitPass::~SoftwareBlitPass() = default;
-struct SwBlitVertex
-	float x = 0.f;
-	float y = 0.f;
-	float z = 0.f;
-	float u = 0.f;
-	float v = 0.f;
-} // namespace
-static const SwBlitVertex kVerts[] =
-	{-.5f, -.5f, 0.f, 0.f, 0.f},
-	{.5f, -.5f, 0.f, 1.f, 0.f},
-	{-.5f, .5f, 0.f, 0.f, 1.f},
-	{.5f, .5f, 0.f, 1.f, 1.f}
-static const uint16_t kIndices[] = {0, 1, 2, 1, 3, 2};
-static const PipelineDesc kPipelineDescription =
-	PipelineProgram::kUnshadedPaletted,
-	{
-		{
-			{sizeof(SwBlitVertex)}
-		},
-		{
-			{VertexAttributeName::kPosition, 0, 0},
-			{VertexAttributeName::kTexCoord0, 0, 12}
-		}
-	},
-	{{
-		{{UniformName::kProjection}},
-		{{UniformName::kModelView, UniformName::kTexCoord0Transform}}
-	}},
-	{{
-		// R8 index texture
-		SamplerName::kSampler0,
-		// 256x1 palette texture
-		SamplerName::kSampler1
-	}},
-	std::nullopt,
-	{
-		PixelFormat::kRGBA8,
-		std::nullopt,
-		{true, true, true, true}
-	},
-	PrimitiveType::kTriangles,
-	CullMode::kNone,
-	FaceWinding::kCounterClockwise,
-	{0.f, 0.f, 0.f, 1.f}
-static uint32_t next_pow_of_2(uint32_t in)
-	in--;
-	in |= in >> 1;
-	in |= in >> 2;
-	in |= in >> 4;
-	in |= in >> 8;
-	in |= in >> 16;
-	in++;
-	return in;
 static void temp_legacy_finishupdate_draws()
@@ -100,8 +38,7 @@ static void temp_legacy_finishupdate_draws()
 		if (cv_ticrate.value)
-		if (cv_showping.value && netgame &&
-				( consoleplayer != serverplayer || ! server_lagless ))
+		if (cv_showping.value && netgame && (consoleplayer != serverplayer || !server_lagless))
 			if (server_lagless)
@@ -110,11 +47,8 @@ static void temp_legacy_finishupdate_draws()
-				for (
-						int player = 1;
-						player < MAXPLAYERS;
-						player++
-				){
+				for (int player = 1; player < MAXPLAYERS; player++)
+				{
 					if (D_IsPlayerHumanAndGaming(player))
@@ -142,149 +76,83 @@ static void temp_legacy_finishupdate_draws()
-void SoftwareBlitPass::prepass(Rhi& rhi)
+SoftwarePass::SoftwarePass() : Pass()
-	if (!pipeline_)
-	{
-		pipeline_ = rhi.create_pipeline(kPipelineDescription);
-	}
-	if (!quad_vbo_)
-	{
-		quad_vbo_ = rhi.create_buffer({sizeof(kVerts), BufferType::kVertexBuffer, BufferUsage::kImmutable});
-		quad_vbo_needs_upload_ = true;
-	}
+SoftwarePass::~SoftwarePass() = default;
-	if (!quad_ibo_)
+void SoftwarePass::prepass(Rhi& rhi)
+	if (rendermode != render_soft)
-		quad_ibo_ = rhi.create_buffer({sizeof(kIndices), BufferType::kIndexBuffer, BufferUsage::kImmutable});
-		quad_ibo_needs_upload_ = true;
+		return;
-	temp_legacy_finishupdate_draws();
+	// Render the player views... or not yet? Needs to be moved out of D_Display in d_main.c
+	// Assume it's already been done and vid.buffer contains the composited splitscreen view.
+	// In the future though, we will want to treat each player viewport separately for postprocessing.
-	uint32_t vid_width = static_cast<uint32_t>(vid.width);
-	uint32_t vid_height = static_cast<uint32_t>(vid.height);
+	temp_legacy_finishupdate_draws();
-	if (screen_tex_ && (screen_tex_width_ < vid_width || screen_tex_height_ < vid_height))
+	// Prepare RHI resources
+	if (screen_texture_ && (static_cast<int32_t>(width_) != vid.width || static_cast<int32_t>(height_) != vid.height))
-		rhi.destroy_texture(screen_tex_);
-		screen_tex_ = kNullHandle;
+		// Mode changed, recreate texture
+		rhi.destroy_texture(screen_texture_);
+		screen_texture_ = kNullHandle;
-	if (!screen_tex_)
-	{
-		screen_tex_width_ = next_pow_of_2(vid_width);
-		screen_tex_height_ = next_pow_of_2(vid_height);
-		screen_tex_ = rhi.create_texture({TextureFormat::kLuminance, screen_tex_width_, screen_tex_height_});
-	}
+	width_ = vid.width;
+	height_ = vid.height;
-	if (!palette_tex_)
+	if (!screen_texture_)
-		palette_tex_ = rhi.create_texture({TextureFormat::kRGBA, 256, 1});
+		screen_texture_ = rhi.create_texture({TextureFormat::kLuminance, width_, height_});
-void SoftwareBlitPass::upload_screen(Rhi& rhi, Handle<TransferContext> ctx)
-	rhi::Rect screen_rect = {
-		0,
-		0,
-		static_cast<uint32_t>(vid.width),
-		static_cast<uint32_t>(vid.height)
-	};
-	tcb::span<uint8_t> screen_span = tcb::span(vid.buffer, static_cast<size_t>(vid.width * vid.height));
-	rhi.update_texture(ctx, screen_tex_, screen_rect, rhi::PixelFormat::kR8, tcb::as_bytes(screen_span));
-void SoftwareBlitPass::upload_palette(Rhi& rhi, Handle<TransferContext> ctx)
-	// Unfortunately, pMasterPalette must be swizzled to get a linear layout.
-	// Maybe some adjustments to palette storage can make this a straight upload.
-	std::array<byteColor_t, 256> palette_32;
-	for (size_t i = 0; i < 256; i++)
+	// If the screen width won't fit the unpack alignment, we need to copy the screen.
+	if (width_ % kPixelRowUnpackAlignment > 0)
-		palette_32[i] = pMasterPalette[i].s;
+		std::size_t padded_width = (width_ + (kPixelRowUnpackAlignment - 1)) & !kPixelRowUnpackAlignment;
+		copy_buffer_.clear();
+		copy_buffer_.reserve(padded_width * height_);
+		for (std::size_t y = 0; y < height_; y++)
+		{
+			for (std::size_t x = 0; x < width_; x++)
+			{
+				copy_buffer_.push_back(vid.buffer[(width_ * y) + x]);
+			}
+			// Padding to unpack alignment
+			for (std::size_t i = 0; i < padded_width - width_; i++)
+			{
+				copy_buffer_.push_back(0);
+			}
+		}
-	rhi.update_texture(ctx, palette_tex_, {0, 0, 256, 1}, rhi::PixelFormat::kRGBA8, tcb::as_bytes(tcb::span(palette_32)));
-void SoftwareBlitPass::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+void SoftwarePass::transfer(Rhi& rhi, Handle<TransferContext> ctx)
-	if (quad_vbo_needs_upload_ && quad_vbo_)
+	// Upload screen
+	tcb::span<const std::byte> screen_span;
+	if (width_ % kPixelRowUnpackAlignment > 0)
-		rhi.update_buffer_contents(ctx, quad_vbo_, 0, tcb::as_bytes(tcb::span(kVerts)));
-		quad_vbo_needs_upload_ = false;
+		screen_span = tcb::as_bytes(tcb::span(copy_buffer_));
-	if (quad_ibo_needs_upload_ && quad_ibo_)
+	else
-		rhi.update_buffer_contents(ctx, quad_ibo_, 0, tcb::as_bytes(tcb::span(kIndices)));
-		quad_ibo_needs_upload_ = false;
+		screen_span = tcb::as_bytes(tcb::span(vid.buffer, width_ * height_));
-	upload_screen(rhi, ctx);
-	upload_palette(rhi, ctx);
-	// Calculate aspect ratio for black borders
-	float aspect = static_cast<float>(vid.width) / static_cast<float>(vid.height);
-	float real_aspect = static_cast<float>(vid.realwidth) / static_cast<float>(vid.realheight);
-	bool taller = aspect > real_aspect;
-	std::array<rhi::UniformVariant, 1> g1_uniforms = {{
-		// Projection
-		std::array<std::array<float, 4>, 4> {{
-			{taller ? 1.f : 1.f / real_aspect, 0.f, 0.f, 0.f},
-			{0.f, taller ? -1.f / (1.f / real_aspect) : -1.f, 0.f, 0.f},
-			{0.f, 0.f, 1.f, 0.f},
-			{0.f, 0.f, 0.f, 1.f}
-		}},
-	}};
-	std::array<rhi::UniformVariant, 2> g2_uniforms =
-	{{
-		// ModelView
-		std::array<std::array<float, 4>, 4>
-		{{
-			{taller ? 2.f : 2.f * aspect, 0.f, 0.f, 0.f},
-			{0.f, taller ? 2.f * (1.f / aspect) : 2.f, 0.f, 0.f},
-			{0.f, 0.f, 1.f, 0.f},
-			{0.f, 0.f, 0.f, 1.f}
-		}},
-		// Texcoord0 Transform
-		std::array<std::array<float, 3>, 3>
-		{{
-			{vid.width / static_cast<float>(screen_tex_width_), 0.f, 0.f},
-			{0.f, vid.height / static_cast<float>(screen_tex_height_), 0.f},
-			{0.f, 0.f, 1.f}
-		}}
-	}};
-	uniform_sets_[0] = rhi.create_uniform_set(ctx, {g1_uniforms});
-	uniform_sets_[1] = rhi.create_uniform_set(ctx, {g2_uniforms});
-	std::array<rhi::VertexAttributeBufferBinding, 1> vbs = {{{0, quad_vbo_}}};
-	std::array<rhi::TextureBinding, 2> tbs = {{
-		{rhi::SamplerName::kSampler0, screen_tex_},
-		{rhi::SamplerName::kSampler1, palette_tex_}
-	}};
-	binding_set_ = rhi.create_binding_set(ctx, pipeline_, {vbs, tbs});
+	rhi.update_texture(ctx, screen_texture_, {0, 0, width_, height_}, PixelFormat::kR8, screen_span);
-void SoftwareBlitPass::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+void SoftwarePass::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
-	rhi.begin_default_render_pass(ctx, true);
-	rhi.bind_pipeline(ctx, pipeline_);
-	rhi.bind_uniform_set(ctx, 0, uniform_sets_[0]);
-	rhi.bind_uniform_set(ctx, 1, uniform_sets_[1]);
-	rhi.bind_binding_set(ctx, binding_set_);
-	rhi.bind_index_buffer(ctx, quad_ibo_);
-	rhi.draw_indexed(ctx, 6, 0);
-	rhi.end_render_pass(ctx);
-void SoftwareBlitPass::postpass(Rhi& rhi)
+void SoftwarePass::postpass(Rhi& rhi)
-	// no-op
diff --git a/src/hwr2/pass_software.hpp b/src/hwr2/pass_software.hpp
index f36c82973a2b5a4dd686c0b9cc4fe622a6f9096c..4e7b02405f561afb54da23f8012b689f91d17bc8 100644
--- a/src/hwr2/pass_software.hpp
+++ b/src/hwr2/pass_software.hpp
@@ -1,44 +1,46 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
-#include <array>
+#include <cstddef>
+#include <vector>
-#include "../rhi/rhi.hpp"
 #include "pass.hpp"
 namespace srb2::hwr2
-class SoftwareBlitPass : public Pass
+/// @brief Renders software player views in prepass and uploads the result to a texture in transfer.
+class SoftwarePass final : public Pass
-	rhi::Handle<rhi::Pipeline> pipeline_;
-	rhi::Handle<rhi::Texture> screen_tex_;
-	rhi::Handle<rhi::Texture> palette_tex_;
-	rhi::Handle<rhi::Buffer> quad_vbo_;
-	rhi::Handle<rhi::Buffer> quad_ibo_;
-	std::array<rhi::Handle<rhi::UniformSet>, 2> uniform_sets_;
-	rhi::Handle<rhi::BindingSet> binding_set_;
-	uint32_t screen_tex_width_ = 0;
-	uint32_t screen_tex_height_ = 0;
-	bool quad_vbo_needs_upload_ = false;
-	bool quad_ibo_needs_upload_ = false;
-	void upload_screen(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx);
-	void upload_palette(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx);
+	rhi::Handle<rhi::Texture> screen_texture_;
+	uint32_t width_ = 0;
+	uint32_t height_ = 0;
+	// Used to ensure the row spans are aligned on the unpack boundary for weird resolutions
+	// Any resolution with a width divisible by 4 doesn't need this, but e.g. 1366x768 needs the intermediary copy
+	std::vector<uint8_t> copy_buffer_;
-	virtual ~SoftwareBlitPass();
+	SoftwarePass();
+	virtual ~SoftwarePass();
 	virtual void prepass(rhi::Rhi& rhi) override;
 	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
 	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
 	virtual void postpass(rhi::Rhi& rhi) override;
+	rhi::Handle<rhi::Texture> screen_texture() const noexcept { return screen_texture_; }
 } // namespace srb2::hwr2
-#endif // __SRB2_HWR2_PASS_SOFTWARE_HPP__
diff --git a/src/hwr2/pass_twodee.cpp b/src/hwr2/pass_twodee.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..469fdcd2aff00b28d0020788117f07d3ed5ccde7
--- /dev/null
+++ b/src/hwr2/pass_twodee.cpp
@@ -0,0 +1,954 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "pass_twodee.hpp"
+#include <unordered_set>
+#include <stb_rect_pack.h>
+#include "../r_patch.h"
+#include "../v_video.h"
+#include "../z_zone.h"
+using namespace srb2;
+using namespace srb2::hwr2;
+using namespace srb2::rhi;
+struct AtlasEntry
+	uint32_t x;
+	uint32_t y;
+	uint32_t w;
+	uint32_t h;
+	uint32_t trim_x;
+	uint32_t trim_y;
+	uint32_t orig_w;
+	uint32_t orig_h;
+struct Atlas
+	Atlas() = default;
+	Atlas(Atlas&&) = default;
+	Handle<Texture> tex;
+	uint32_t tex_width;
+	uint32_t tex_height;
+	std::unordered_map<const patch_t*, AtlasEntry> entries;
+	std::unique_ptr<stbrp_context> rp_ctx {nullptr};
+	std::unique_ptr<stbrp_node[]> rp_nodes {nullptr};
+	Atlas& operator=(Atlas&&) = default;
+} // namespace
+struct srb2::hwr2::TwodeePassData
+	Handle<Texture> default_tex;
+	Handle<Texture> palette_tex;
+	Handle<Texture> default_colormap_tex;
+	std::vector<Atlas> patch_atlases;
+	std::unordered_map<const patch_t*, size_t> patch_lookup;
+	std::vector<const patch_t*> patches_to_upload;
+	std::unordered_map<const uint8_t*, Handle<Texture>> colormaps;
+	std::vector<const uint8_t*> colormaps_to_upload;
+	std::unordered_map<TwodeePipelineKey, Handle<Pipeline>> pipelines;
+	bool upload_default_tex = false;
+std::shared_ptr<TwodeePassData> srb2::hwr2::make_twodee_pass_data()
+	return std::make_shared<TwodeePassData>();
+TwodeePass::TwodeePass() : Pass()
+TwodeePass::~TwodeePass() = default;
+static constexpr const uint32_t kVboInitSize = 32768;
+static constexpr const uint32_t kIboInitSize = 4096;
+static Rect trimmed_patch_dim(const patch_t* patch);
+static void create_atlas(Rhi& rhi, TwodeePassData& pass_data)
+	Atlas new_atlas;
+	new_atlas.tex = rhi.create_texture({TextureFormat::kLuminanceAlpha, 2048, 2048});
+	new_atlas.tex_width = 2048;
+	new_atlas.tex_height = 2048;
+	new_atlas.rp_ctx = std::make_unique<stbrp_context>();
+	new_atlas.rp_nodes = std::make_unique<stbrp_node[]>(4096);
+	for (size_t i = 0; i < 4096; i++)
+	{
+		new_atlas.rp_nodes[i] = {};
+	}
+	stbrp_init_target(new_atlas.rp_ctx.get(), 2048, 2048, new_atlas.rp_nodes.get(), 4096);
+	// it is CRITICALLY important that the atlas is MOVED, not COPIED, otherwise the node ptrs will be broken
+	pass_data.patch_atlases.push_back(std::move(new_atlas));
+static void pack_patches(Rhi& rhi, TwodeePassData& pass_data, tcb::span<const patch_t*> patches)
+	// Prepare stbrp rects for patches to be loaded.
+	std::vector<stbrp_rect> rects;
+	for (size_t i = 0; i < patches.size(); i++)
+	{
+		const patch_t* patch = patches[i];
+		Rect trimmed_rect = trimmed_patch_dim(patch);
+		stbrp_rect rect {};
+		rect.id = i;
+		rect.w = trimmed_rect.w;
+		rect.h = trimmed_rect.h;
+		rects.push_back(std::move(rect));
+	}
+	while (rects.size() > 0)
+	{
+		if (pass_data.patch_atlases.size() == 0)
+		{
+			create_atlas(rhi, pass_data);
+		}
+		for (size_t atlas_index = 0; atlas_index < pass_data.patch_atlases.size(); atlas_index++)
+		{
+			auto& atlas = pass_data.patch_atlases[atlas_index];
+			stbrp_pack_rects(atlas.rp_ctx.get(), rects.data(), rects.size());
+			for (auto itr = rects.begin(); itr != rects.end();)
+			{
+				auto& rect = *itr;
+				if (rect.was_packed)
+				{
+					AtlasEntry entry;
+					const patch_t* patch = patches[rect.id];
+					// TODO prevent unnecessary recalculation of trim?
+					Rect trimmed_rect = trimmed_patch_dim(patch);
+					entry.x = static_cast<uint32_t>(rect.x);
+					entry.y = static_cast<uint32_t>(rect.y);
+					entry.w = static_cast<uint32_t>(rect.w);
+					entry.h = static_cast<uint32_t>(rect.h);
+					entry.trim_x = static_cast<uint32_t>(trimmed_rect.x);
+					entry.trim_y = static_cast<uint32_t>(trimmed_rect.y);
+					entry.orig_w = static_cast<uint32_t>(patch->width);
+					entry.orig_h = static_cast<uint32_t>(patch->height);
+					atlas.entries.insert_or_assign(patch, std::move(entry));
+					pass_data.patch_lookup.insert_or_assign(patch, atlas_index);
+					pass_data.patches_to_upload.push_back(patch);
+					rects.erase(itr);
+					continue;
+				}
+				++itr;
+			}
+			// If we still have rects to pack, and we're at the last atlas, create another atlas.
+			// TODO This could end up in an infinite loop if the patches are bigger than an atlas. Such patches need to
+			// be loaded as individual RHI textures instead.
+			if (atlas_index == pass_data.patch_atlases.size() - 1 && rects.size() > 0)
+			{
+				create_atlas(rhi, pass_data);
+			}
+		}
+	}
+/// @brief Derive the subrect of the given patch with empty columns and rows excluded.
+static Rect trimmed_patch_dim(const patch_t* patch)
+	bool minx_found = false;
+	int32_t minx = 0;
+	int32_t maxx = 0;
+	int32_t miny = patch->height;
+	int32_t maxy = 0;
+	for (int32_t x = 0; x < patch->width; x++)
+	{
+		const int32_t columnofs = patch->columnofs[x];
+		const column_t* column = reinterpret_cast<const column_t*>(patch->columns + columnofs);
+		// If the first pole is empty (topdelta = 255), there are no pixels in this column
+		if (!minx_found && column->topdelta == 0xFF)
+		{
+			// Thus, the minx is at least one higher than the current column.
+			minx = x + 1;
+			continue;
+		}
+		minx_found = true;
+		if (minx_found && column->topdelta != 0xFF)
+		{
+			maxx = x;
+		}
+		miny = std::min(static_cast<int32_t>(column->topdelta), miny);
+		int32_t prevdelta = 0;
+		int32_t topdelta = 0;
+		while (column->topdelta != 0xFF)
+		{
+			topdelta = column->topdelta;
+			// Tall patches hack
+			if (topdelta <= prevdelta)
+			{
+				topdelta += prevdelta;
+			}
+			prevdelta = topdelta;
+			maxy = std::max(topdelta + column->length, maxy);
+			column = reinterpret_cast<const column_t*>(reinterpret_cast<const uint8_t*>(column) + column->length + 4);
+		}
+	}
+	maxx += 1;
+	maxx = std::max(minx, maxx);
+	maxy = std::max(miny, maxy);
+	return {minx, miny, static_cast<uint32_t>(maxx - minx), static_cast<uint32_t>(maxy - miny)};
+static void convert_patch_to_trimmed_rg8_pixels(const patch_t* patch, std::vector<uint8_t>& out)
+	Rect trimmed_rect = trimmed_patch_dim(patch);
+	if (trimmed_rect.w % 2 > 0)
+	{
+		// In order to force 4-byte row alignment, an extra column is added to the image data.
+		// Look up GL_UNPACK_ALIGNMENT (which defaults to 4 bytes)
+		trimmed_rect.w += 1;
+	}
+	out.clear();
+	// 2 bytes per pixel; 1 for the color index, 1 for the alpha. (RG8)
+	out.resize(trimmed_rect.w * trimmed_rect.h * 2, 0);
+	for (int32_t x = 0; x < static_cast<int32_t>(trimmed_rect.w) && x < (patch->width - trimmed_rect.x); x++)
+	{
+		const int32_t columnofs = patch->columnofs[x + trimmed_rect.x];
+		const column_t* column = reinterpret_cast<const column_t*>(patch->columns + columnofs);
+		int32_t prevdelta = 0;
+		int32_t topdelta = 0;
+		while (column->topdelta != 0xFF)
+		{
+			topdelta = column->topdelta;
+			// prevdelta is used to implement tall patches hack
+			if (topdelta <= prevdelta)
+			{
+				topdelta += prevdelta;
+			}
+			prevdelta = topdelta;
+			const uint8_t* source = reinterpret_cast<const uint8_t*>(column) + 3;
+			int32_t count = column->length; // is this byte order safe...?
+			for (int32_t i = 0; i < count; i++)
+			{
+				int32_t output_y = topdelta + i - trimmed_rect.y;
+				if (output_y < 0)
+				{
+					continue;
+				}
+				if (output_y >= static_cast<int32_t>(trimmed_rect.h))
+				{
+					break;
+				}
+				size_t pixel_index = (output_y * trimmed_rect.w + x) * 2;
+				out[pixel_index + 0] = source[i]; // index in luminance/red channel
+				out[pixel_index + 1] = 0xFF;	  // alpha/green value of 1
+			}
+			column = reinterpret_cast<const column_t*>(reinterpret_cast<const uint8_t*>(column) + column->length + 4);
+		}
+	}
+static TwodeePipelineKey pipeline_key_for_cmd(const Draw2dCmd& cmd)
+	return {hwr2::get_blend_mode(cmd), hwr2::is_draw_lines(cmd)};
+static PipelineDesc make_pipeline_desc(TwodeePipelineKey key)
+	constexpr const VertexInputDesc kTwodeeVertexInput = {
+		{{sizeof(TwodeeVertex)}},
+		{{VertexAttributeName::kPosition, 0, 0},
+		 {VertexAttributeName::kTexCoord0, 0, 12},
+		 {VertexAttributeName::kColor, 0, 20}}};
+	BlendDesc blend_desc;
+	switch (key.blend)
+	{
+	case Draw2dBlend::kModulate:
+		blend_desc.source_factor_color = BlendFactor::kSourceAlpha;
+		blend_desc.dest_factor_color = BlendFactor::kOneMinusSourceAlpha;
+		blend_desc.color_function = BlendFunction::kAdd;
+		blend_desc.source_factor_alpha = BlendFactor::kOne;
+		blend_desc.dest_factor_alpha = BlendFactor::kOneMinusSourceAlpha;
+		blend_desc.alpha_function = BlendFunction::kAdd;
+		break;
+	case Draw2dBlend::kAdditive:
+		blend_desc.source_factor_color = BlendFactor::kSourceAlpha;
+		blend_desc.dest_factor_color = BlendFactor::kOne;
+		blend_desc.color_function = BlendFunction::kAdd;
+		blend_desc.source_factor_alpha = BlendFactor::kOne;
+		blend_desc.dest_factor_alpha = BlendFactor::kOneMinusSourceAlpha;
+		blend_desc.alpha_function = BlendFunction::kAdd;
+		break;
+	case Draw2dBlend::kSubtractive:
+		blend_desc.source_factor_color = BlendFactor::kSourceAlpha;
+		blend_desc.dest_factor_color = BlendFactor::kOne;
+		blend_desc.color_function = BlendFunction::kSubtract;
+		blend_desc.source_factor_alpha = BlendFactor::kOne;
+		blend_desc.dest_factor_alpha = BlendFactor::kOneMinusSourceAlpha;
+		blend_desc.alpha_function = BlendFunction::kAdd;
+		break;
+	case Draw2dBlend::kReverseSubtractive:
+		blend_desc.source_factor_color = BlendFactor::kSourceAlpha;
+		blend_desc.dest_factor_color = BlendFactor::kOne;
+		blend_desc.color_function = BlendFunction::kReverseSubtract;
+		blend_desc.source_factor_alpha = BlendFactor::kOne;
+		blend_desc.dest_factor_alpha = BlendFactor::kOneMinusSourceAlpha;
+		blend_desc.alpha_function = BlendFunction::kAdd;
+		break;
+	case Draw2dBlend::kInvertDest:
+		blend_desc.source_factor_color = BlendFactor::kOne;
+		blend_desc.dest_factor_color = BlendFactor::kOne;
+		blend_desc.color_function = BlendFunction::kSubtract;
+		blend_desc.source_factor_alpha = BlendFactor::kZero;
+		blend_desc.dest_factor_alpha = BlendFactor::kDestAlpha;
+		blend_desc.alpha_function = BlendFunction::kAdd;
+		break;
+	}
+	return {
+		PipelineProgram::kUnshadedPaletted,
+		kTwodeeVertexInput,
+		{{{{UniformName::kProjection}},
+		  {{UniformName::kModelView, UniformName::kTexCoord0Transform, UniformName::kSampler0IsIndexedAlpha}}}},
+		{{SamplerName::kSampler0, SamplerName::kSampler1, SamplerName::kSampler2}},
+		std::nullopt,
+		{PixelFormat::kRGBA8, blend_desc, {true, true, true, true}},
+		key.lines ? PrimitiveType::kLines : PrimitiveType::kTriangles,
+		CullMode::kNone,
+		FaceWinding::kCounterClockwise,
+		{0.f, 0.f, 0.f, 1.f}};
+static void rewrite_patch_quad_vertices(Draw2dList& list, const Draw2dPatchQuad& cmd, TwodeePassData* data)
+	// Patch quads are clipped according to the patch's atlas entry
+	if (cmd.patch == nullptr)
+	{
+		return;
+	}
+	std::size_t atlas_index = data->patch_lookup[cmd.patch];
+	auto& atlas = data->patch_atlases[atlas_index];
+	auto& entry = atlas.entries[cmd.patch];
+	// Rewrite the vertex data completely.
+	// The UVs of the trimmed patch in atlas UV space.
+	const float atlas_umin = static_cast<float>(entry.x) / atlas.tex_width;
+	const float atlas_umax = static_cast<float>(entry.x + entry.w) / atlas.tex_width;
+	const float atlas_vmin = static_cast<float>(entry.y) / atlas.tex_height;
+	const float atlas_vmax = static_cast<float>(entry.y + entry.h) / atlas.tex_height;
+	// The UVs of the trimmed patch in untrimmed UV space.
+	// The command's UVs are in untrimmed UV space.
+	const float trim_umin = static_cast<float>(entry.trim_x) / entry.orig_w;
+	const float trim_umax = static_cast<float>(entry.trim_x + entry.w) / entry.orig_w;
+	const float trim_vmin = static_cast<float>(entry.trim_y) / entry.orig_h;
+	const float trim_vmax = static_cast<float>(entry.trim_y + entry.h) / entry.orig_h;
+	// Calculate positions
+	const float cmd_xrange = cmd.xmax - cmd.xmin;
+	const float cmd_yrange = cmd.ymax - cmd.ymin;
+	const float clipped_xmin = cmd.clip ? std::clamp(cmd.xmin, cmd.clip_xmin, cmd.clip_xmax) : cmd.xmin;
+	const float clipped_xmax = cmd.clip ? std::clamp(cmd.xmax, cmd.clip_xmin, cmd.clip_xmax) : cmd.xmax;
+	const float clipped_ymin = cmd.clip ? std::clamp(cmd.ymin, cmd.clip_ymin, cmd.clip_ymax) : cmd.ymin;
+	const float clipped_ymax = cmd.clip ? std::clamp(cmd.ymax, cmd.clip_ymin, cmd.clip_ymax) : cmd.ymax;
+	const float trimmed_xmin = cmd.xmin + trim_umin * cmd_xrange;
+	const float trimmed_xmax = cmd.xmax - (1.f - trim_umax) * cmd_xrange;
+	const float trimmed_ymin = cmd.ymin + trim_vmin * cmd_yrange;
+	const float trimmed_ymax = cmd.ymax - (1.f - trim_vmax) * cmd_yrange;
+	const float trimmed_xrange = trimmed_xmax - trimmed_xmin;
+	const float trimmed_yrange = trimmed_ymax - trimmed_ymin;
+	float clipped_trimmed_xmin = std::max(clipped_xmin, trimmed_xmin);
+	float clipped_trimmed_xmax = std::min(clipped_xmax, trimmed_xmax);
+	float clipped_trimmed_ymin = std::max(clipped_ymin, trimmed_ymin);
+	float clipped_trimmed_ymax = std::min(clipped_ymax, trimmed_ymax);
+	clipped_trimmed_xmin = std::min(clipped_trimmed_xmin, clipped_trimmed_xmax);
+	clipped_trimmed_ymin = std::min(clipped_trimmed_ymin, clipped_trimmed_ymax);
+	// Calculate UVs
+	// Start from trimmed dimensions as 0..1 and clip UVs based on that
+	// UVs in trimmed UV space (if clipped_xmin = trimmed_xmin, it'll be 0)
+	float clipped_umin;
+	float clipped_umax;
+	float clipped_vmin;
+	float clipped_vmax;
+	if (cmd.flip)
+	{
+		clipped_umin = std::max(0.f, 1.f - (clipped_trimmed_xmin - trimmed_xmin) / trimmed_xrange);
+		clipped_umax = std::min(1.f, (trimmed_xmax - clipped_trimmed_xmax) / trimmed_xrange);
+	}
+	else
+	{
+		clipped_umin = std::min(1.f, (clipped_trimmed_xmin - trimmed_xmin) / trimmed_xrange);
+		clipped_umax = std::max(0.f, 1.f - (trimmed_xmax - clipped_trimmed_xmax) / trimmed_xrange);
+	}
+	if (cmd.vflip)
+	{
+		clipped_vmin = std::max(0.f, 1.f - (clipped_trimmed_ymin - trimmed_ymin) / trimmed_yrange);
+		clipped_vmax = std::min(1.f, (trimmed_ymax - clipped_trimmed_ymax) / trimmed_yrange);
+	}
+	else
+	{
+		clipped_vmin = std::min(1.f, 0.f + (clipped_trimmed_ymin - trimmed_ymin) / trimmed_yrange);
+		clipped_vmax = std::max(0.f, 1.f - (trimmed_ymax - clipped_trimmed_ymax) / trimmed_yrange);
+	}
+	// convert from trimmed UV space to atlas space
+	clipped_umin = (atlas_umax - atlas_umin) * clipped_umin + atlas_umin;
+	clipped_umax = (atlas_umax - atlas_umin) * clipped_umax + atlas_umin;
+	clipped_vmin = (atlas_vmax - atlas_vmin) * clipped_vmin + atlas_vmin;
+	clipped_vmax = (atlas_vmax - atlas_vmin) * clipped_vmax + atlas_vmin;
+	std::size_t vtx_offs = cmd.begin_index;
+	// Vertex order is always min/min, max/min, max/max, min/max
+	list.vertices[vtx_offs + 0].x = clipped_trimmed_xmin;
+	list.vertices[vtx_offs + 0].y = clipped_trimmed_ymin;
+	list.vertices[vtx_offs + 0].u = clipped_umin;
+	list.vertices[vtx_offs + 0].v = clipped_vmin;
+	list.vertices[vtx_offs + 1].x = clipped_trimmed_xmax;
+	list.vertices[vtx_offs + 1].y = clipped_trimmed_ymin;
+	list.vertices[vtx_offs + 1].u = clipped_umax;
+	list.vertices[vtx_offs + 1].v = clipped_vmin;
+	list.vertices[vtx_offs + 2].x = clipped_trimmed_xmax;
+	list.vertices[vtx_offs + 2].y = clipped_trimmed_ymax;
+	list.vertices[vtx_offs + 2].u = clipped_umax;
+	list.vertices[vtx_offs + 2].v = clipped_vmax;
+	list.vertices[vtx_offs + 3].x = clipped_trimmed_xmin;
+	list.vertices[vtx_offs + 3].y = clipped_trimmed_ymax;
+	list.vertices[vtx_offs + 3].u = clipped_umin;
+	list.vertices[vtx_offs + 3].v = clipped_vmax;
+void TwodeePass::prepass(Rhi& rhi)
+	if (!ctx_ || !data_)
+	{
+		return;
+	}
+	if (data_->pipelines.size() == 0)
+	{
+		TwodeePipelineKey modulate_tris = {Draw2dBlend::kModulate, false};
+		TwodeePipelineKey additive_tris = {Draw2dBlend::kAdditive, false};
+		TwodeePipelineKey subtractive_tris = {Draw2dBlend::kSubtractive, false};
+		TwodeePipelineKey revsubtractive_tris = {Draw2dBlend::kReverseSubtractive, false};
+		TwodeePipelineKey invertdest_tris = {Draw2dBlend::kInvertDest, false};
+		TwodeePipelineKey modulate_lines = {Draw2dBlend::kModulate, true};
+		TwodeePipelineKey additive_lines = {Draw2dBlend::kAdditive, true};
+		TwodeePipelineKey subtractive_lines = {Draw2dBlend::kSubtractive, true};
+		TwodeePipelineKey revsubtractive_lines = {Draw2dBlend::kReverseSubtractive, true};
+		TwodeePipelineKey invertdest_lines = {Draw2dBlend::kInvertDest, true};
+		data_->pipelines.insert({modulate_tris, rhi.create_pipeline(make_pipeline_desc(modulate_tris))});
+		data_->pipelines.insert({additive_tris, rhi.create_pipeline(make_pipeline_desc(additive_tris))});
+		data_->pipelines.insert({subtractive_tris, rhi.create_pipeline(make_pipeline_desc(subtractive_tris))});
+		data_->pipelines.insert({revsubtractive_tris, rhi.create_pipeline(make_pipeline_desc(revsubtractive_tris))});
+		data_->pipelines.insert({invertdest_tris, rhi.create_pipeline(make_pipeline_desc(invertdest_tris))});
+		data_->pipelines.insert({modulate_lines, rhi.create_pipeline(make_pipeline_desc(modulate_lines))});
+		data_->pipelines.insert({additive_lines, rhi.create_pipeline(make_pipeline_desc(additive_lines))});
+		data_->pipelines.insert({subtractive_lines, rhi.create_pipeline(make_pipeline_desc(subtractive_lines))});
+		data_->pipelines.insert({revsubtractive_lines, rhi.create_pipeline(make_pipeline_desc(revsubtractive_lines))});
+		data_->pipelines.insert({invertdest_lines, rhi.create_pipeline(make_pipeline_desc(revsubtractive_lines))});
+	}
+	if (!data_->default_tex)
+	{
+		data_->default_tex = rhi.create_texture({TextureFormat::kLuminanceAlpha, 2, 1});
+		data_->upload_default_tex = true;
+	}
+	if (!data_->palette_tex)
+	{
+		data_->palette_tex = rhi.create_texture({TextureFormat::kRGBA, 256, 1});
+	}
+	if (!data_->default_colormap_tex)
+	{
+		data_->default_colormap_tex = rhi.create_texture({TextureFormat::kLuminance, 256, 1});
+		data_->upload_default_tex = true;
+	}
+	if (!render_pass_)
+	{
+		render_pass_ = rhi.create_render_pass(
+			{std::nullopt, PixelFormat::kRGBA8, AttachmentLoadOp::kLoad, AttachmentStoreOp::kStore}
+		);
+	}
+	// Check for patches that are being freed after this frame. Those patches must be present in the atlases for this
+	// frame, but all atlases need to be cleared and rebuilt on next call to prepass.
+	// This is based on the assumption that patches are very rarely freed during runtime; occasionally repacking the
+	// atlases to free up space from patches that will never be referenced again is acceptable.
+	if (rebuild_atlases_)
+	{
+		for (auto& atlas : data_->patch_atlases)
+		{
+			rhi.destroy_texture(atlas.tex);
+		}
+		data_->patch_atlases.clear();
+		data_->patch_lookup.clear();
+		rebuild_atlases_ = false;
+	}
+	if (data_->patch_atlases.size() > 2)
+	{
+		// Rebuild the atlases next frame because we have too many patches in the atlas cache.
+		rebuild_atlases_ = true;
+	}
+	// Stage 1 - command list patch detection
+	std::unordered_set<const patch_t*> found_patches;
+	std::unordered_set<const uint8_t*> found_colormaps;
+	for (const auto& list : *ctx_)
+	{
+		for (const auto& cmd : list.cmds)
+		{
+			auto visitor = srb2::Overload {
+				[&](const Draw2dPatchQuad& cmd)
+				{
+					if (cmd.patch != nullptr)
+					{
+						found_patches.insert(cmd.patch);
+					}
+					if (cmd.colormap != nullptr)
+					{
+						found_colormaps.insert(cmd.colormap);
+					}
+				},
+				[&](const Draw2dVertices& cmd) {}};
+			std::visit(visitor, cmd);
+		}
+	}
+	std::unordered_set<const patch_t*> patch_cache_hits;
+	std::unordered_set<const patch_t*> patch_cache_misses;
+	for (auto patch : found_patches)
+	{
+		if (data_->patch_lookup.find(patch) != data_->patch_lookup.end())
+		{
+			patch_cache_hits.insert(patch);
+		}
+		else
+		{
+			patch_cache_misses.insert(patch);
+		}
+	}
+	for (auto colormap : found_colormaps)
+	{
+		if (data_->colormaps.find(colormap) == data_->colormaps.end())
+		{
+			Handle<Texture> colormap_tex = rhi.create_texture({TextureFormat::kLuminance, 256, 1});
+			data_->colormaps.insert({colormap, colormap_tex});
+		}
+		data_->colormaps_to_upload.push_back(colormap);
+	}
+	// Stage 2 - pack rects into atlases
+	std::vector<const patch_t*> patches_to_pack(patch_cache_misses.begin(), patch_cache_misses.end());
+	pack_patches(rhi, *data_, patches_to_pack);
+	// We now know what patches need to be uploaded.
+	size_t list_index = 0;
+	for (auto& list : *ctx_)
+	{
+		Handle<Buffer> vbo;
+		uint32_t vertex_data_size = tcb::as_bytes(tcb::span(list.vertices)).size();
+		uint32_t needed_vbo_size = std::max(
+			kVboInitSize,
+			((static_cast<uint32_t>(vertex_data_size) + kVboInitSize - 1) / kVboInitSize) * kVboInitSize
+		);
+		// Get the existing buffer objects. Recreate them if they don't exist, or needs to be bigger.
+		if (list_index >= vbos_.size())
+		{
+			vbo = rhi.create_buffer({needed_vbo_size, BufferType::kVertexBuffer, BufferUsage::kDynamic});
+			vbos_.push_back({vbo, needed_vbo_size});
+		}
+		else
+		{
+			uint32_t existing_size = std::get<1>(vbos_[list_index]);
+			if (needed_vbo_size > existing_size)
+			{
+				rhi.destroy_buffer(std::get<0>(vbos_[list_index]));
+				vbo = rhi.create_buffer({needed_vbo_size, BufferType::kVertexBuffer, BufferUsage::kDynamic});
+				vbos_[list_index] = {vbo, needed_vbo_size};
+			}
+			vbo = std::get<0>(vbos_[list_index]);
+		}
+		Handle<Buffer> ibo;
+		uint32_t index_data_size = tcb::as_bytes(tcb::span(list.indices)).size();
+		uint32_t needed_ibo_size = std::max(
+			kIboInitSize,
+			((static_cast<uint32_t>(index_data_size) + kIboInitSize - 1) / kIboInitSize) * kIboInitSize
+		);
+		if (list_index >= ibos_.size())
+		{
+			ibo = rhi.create_buffer({needed_ibo_size, BufferType::kIndexBuffer, BufferUsage::kDynamic});
+			ibos_.push_back({ibo, needed_ibo_size});
+		}
+		else
+		{
+			uint32_t existing_size = std::get<1>(ibos_[list_index]);
+			if (needed_ibo_size > existing_size)
+			{
+				rhi.destroy_buffer(std::get<0>(ibos_[list_index]));
+				ibo = rhi.create_buffer({needed_ibo_size, BufferType::kIndexBuffer, BufferUsage::kDynamic});
+				ibos_[list_index] = {ibo, needed_ibo_size};
+			}
+			ibo = std::get<0>(ibos_[list_index]);
+		}
+		// Create a merged command list
+		MergedTwodeeCommandList merged_list;
+		merged_list.vbo = vbo;
+		merged_list.vbo_size = needed_vbo_size;
+		merged_list.ibo = ibo;
+		merged_list.ibo_size = needed_ibo_size;
+		MergedTwodeeCommand new_cmd;
+		new_cmd.index_offset = 0;
+		new_cmd.elements = 0;
+		new_cmd.colormap = nullptr;
+		// safety: a command list is required to have at least 1 command
+		new_cmd.pipeline_key = pipeline_key_for_cmd(list.cmds[0]);
+		merged_list.cmds.push_back(std::move(new_cmd));
+		for (auto& cmd : list.cmds)
+		{
+			auto& merged_cmd = *merged_list.cmds.rbegin();
+			bool new_cmd_needed = false;
+			TwodeePipelineKey pk = pipeline_key_for_cmd(cmd);
+			new_cmd_needed = new_cmd_needed || (pk != merged_cmd.pipeline_key);
+			// We need to split the merged commands based on the kind of texture
+			// Patches are converted to atlas texture indexes, which we've just packed the patch rects for
+			// Flats are uploaded as individual textures.
+			// TODO actually implement flat drawing
+			auto tex_visitor = srb2::Overload {
+				[&](const Draw2dPatchQuad& cmd)
+				{
+					if (cmd.patch == nullptr)
+					{
+						new_cmd_needed = new_cmd_needed || (merged_cmd.texture != std::nullopt);
+					}
+					else
+					{
+						size_t atlas_index = data_->patch_lookup[cmd.patch];
+						typeof(merged_cmd.texture) atlas_index_texture = atlas_index;
+						new_cmd_needed = new_cmd_needed || (merged_cmd.texture != atlas_index_texture);
+					}
+					new_cmd_needed = new_cmd_needed || (merged_cmd.colormap != cmd.colormap);
+				},
+				[&](const Draw2dVertices& cmd)
+				{
+					if (cmd.flat_lump == LUMPERROR)
+					{
+						new_cmd_needed |= (merged_cmd.texture != std::nullopt);
+					}
+					else
+					{
+						typeof(merged_cmd.texture) flat_tex = MergedTwodeeCommandFlatTexture {cmd.flat_lump};
+						new_cmd_needed |= (merged_cmd.texture != flat_tex);
+					}
+					new_cmd_needed = new_cmd_needed || (merged_cmd.colormap != nullptr);
+				}};
+			std::visit(tex_visitor, cmd);
+			if (new_cmd_needed)
+			{
+				MergedTwodeeCommand the_new_one;
+				the_new_one.index_offset = merged_cmd.index_offset + merged_cmd.elements;
+				// Map to the merged version of the texture variant. Yay...!
+				auto tex_visitor_again = srb2::Overload {
+					[&](const Draw2dPatchQuad& cmd)
+					{
+						if (cmd.patch != nullptr)
+						{
+							the_new_one.texture = data_->patch_lookup[cmd.patch];
+						}
+						else
+						{
+							the_new_one.texture = std::nullopt;
+						}
+						the_new_one.colormap = cmd.colormap;
+					},
+					[&](const Draw2dVertices& cmd)
+					{
+						if (cmd.flat_lump != LUMPERROR)
+						{
+							flat_manager_->find_or_create_indexed(rhi, cmd.flat_lump);
+							typeof(the_new_one.texture) t = MergedTwodeeCommandFlatTexture {cmd.flat_lump};
+							the_new_one.texture = t;
+						}
+						else
+						{
+							the_new_one.texture = std::nullopt;
+						}
+						the_new_one.colormap = nullptr;
+					}};
+				std::visit(tex_visitor_again, cmd);
+				the_new_one.pipeline_key = pipeline_key_for_cmd(cmd);
+				merged_list.cmds.push_back(std::move(the_new_one));
+			}
+			// There may or may not be a new current command; update its element count
+			auto& new_merged_cmd = *merged_list.cmds.rbegin();
+			// We know for sure that all commands in a command list have a contiguous range of elements in the IBO
+			// So we can draw them in batch if the pipeline key and textures match
+			new_merged_cmd.elements += hwr2::elements(cmd);
+			// Perform coordinate transformations
+			{
+				auto vtx_transform_visitor = srb2::Overload {
+					[&](const Draw2dPatchQuad& cmd) { rewrite_patch_quad_vertices(list, cmd, data_.get()); },
+					[&](const Draw2dVertices& cmd) {}};
+				std::visit(vtx_transform_visitor, cmd);
+			}
+		}
+		cmd_lists_.push_back(std::move(merged_list));
+		list_index++;
+	}
+void TwodeePass::transfer(Rhi& rhi, Handle<TransferContext> ctx)
+	if (!ctx_ || !data_)
+	{
+		return;
+	}
+	if (data_->upload_default_tex)
+	{
+		std::array<uint8_t, 4> data = {0, 255, 0, 255};
+		rhi.update_texture(ctx, data_->default_tex, {0, 0, 2, 1}, PixelFormat::kRG8, tcb::as_bytes(tcb::span(data)));
+		std::array<uint8_t, 256> colormap_data;
+		for (size_t i = 0; i < 256; i++)
+		{
+			colormap_data[i] = i;
+		}
+		rhi.update_texture(
+			ctx,
+			data_->default_colormap_tex,
+			{0, 0, 256, 1},
+			PixelFormat::kR8,
+			tcb::as_bytes(tcb::span(colormap_data))
+		);
+		data_->upload_default_tex = false;
+	}
+	{
+		// TODO share palette tex with software pass
+		// Unfortunately, pMasterPalette must be swizzled to get a linear layout.
+		// Maybe some adjustments to palette storage can make this a straight upload.
+		std::array<byteColor_t, 256> palette_32;
+		for (size_t i = 0; i < 256; i++)
+		{
+			palette_32[i] = pMasterPalette[i].s;
+		}
+		rhi.update_texture(
+			ctx,
+			data_->palette_tex,
+			{0, 0, 256, 1},
+			rhi::PixelFormat::kRGBA8,
+			tcb::as_bytes(tcb::span(palette_32))
+		);
+	}
+	for (auto colormap : data_->colormaps_to_upload)
+	{
+		rhi.update_texture(
+			ctx,
+			data_->colormaps[colormap],
+			{0, 0, 256, 1},
+			rhi::PixelFormat::kR8,
+			tcb::as_bytes(tcb::span(colormap, 256))
+		);
+	}
+	data_->colormaps_to_upload.clear();
+	// Convert patches to RG8 textures and upload to atlas pages
+	std::vector<uint8_t> patch_data;
+	for (const patch_t* patch_to_upload : data_->patches_to_upload)
+	{
+		Atlas& atlas = data_->patch_atlases[data_->patch_lookup[patch_to_upload]];
+		AtlasEntry& entry = atlas.entries[patch_to_upload];
+		convert_patch_to_trimmed_rg8_pixels(patch_to_upload, patch_data);
+		rhi.update_texture(
+			ctx,
+			atlas.tex,
+			{static_cast<int32_t>(entry.x), static_cast<int32_t>(entry.y), entry.w, entry.h},
+			PixelFormat::kRG8,
+			tcb::as_bytes(tcb::span(patch_data))
+		);
+	}
+	data_->patches_to_upload.clear();
+	// Update the buffers for each list
+	auto ctx_list_itr = ctx_->begin();
+	for (size_t i = 0; i < cmd_lists_.size() && ctx_list_itr != ctx_->end(); i++)
+	{
+		auto& merged_list = cmd_lists_[i];
+		auto& orig_list = *ctx_list_itr;
+		tcb::span<const std::byte> vertex_data = tcb::as_bytes(tcb::span(orig_list.vertices));
+		tcb::span<const std::byte> index_data = tcb::as_bytes(tcb::span(orig_list.indices));
+		rhi.update_buffer_contents(ctx, merged_list.vbo, 0, vertex_data);
+		rhi.update_buffer_contents(ctx, merged_list.ibo, 0, index_data);
+		// Update the binding sets for each individual merged command
+		VertexAttributeBufferBinding vbos[] = {{0, merged_list.vbo}};
+		for (auto& mcmd : merged_list.cmds)
+		{
+			TextureBinding tx[3];
+			auto tex_visitor = srb2::Overload {
+				[&](size_t atlas_index)
+				{
+					Atlas& atlas = data_->patch_atlases[atlas_index];
+					tx[0] = {SamplerName::kSampler0, atlas.tex};
+					tx[1] = {SamplerName::kSampler1, data_->palette_tex};
+				},
+				[&](const MergedTwodeeCommandFlatTexture& tex)
+				{
+					Handle<Texture> th = flat_manager_->find_indexed(tex.lump);
+					SRB2_ASSERT(th != kNullHandle);
+					tx[0] = {SamplerName::kSampler0, th};
+					tx[1] = {SamplerName::kSampler1, data_->palette_tex};
+				}};
+			if (mcmd.texture)
+			{
+				std::visit(tex_visitor, *mcmd.texture);
+			}
+			else
+			{
+				tx[0] = {SamplerName::kSampler0, data_->default_tex};
+				tx[1] = {SamplerName::kSampler1, data_->palette_tex};
+			}
+			const uint8_t* colormap = mcmd.colormap;
+			Handle<Texture> colormap_h = data_->default_colormap_tex;
+			if (colormap)
+			{
+				SRB2_ASSERT(data_->colormaps.find(colormap) != data_->colormaps.end());
+				colormap_h = data_->colormaps[colormap];
+			}
+			tx[2] = {SamplerName::kSampler2, colormap_h};
+			mcmd.binding_set =
+				rhi.create_binding_set(ctx, data_->pipelines[mcmd.pipeline_key], {tcb::span(vbos), tcb::span(tx)});
+		}
+		ctx_list_itr++;
+	}
+	// Uniform sets
+	std::array<UniformVariant, 1> g1_uniforms = {{
+		// Projection
+		std::array<std::array<float, 4>, 4> {
+			{{2.f / vid.width, 0.f, 0.f, 0.f},
+			 {0.f, -2.f / vid.height, 0.f, 0.f},
+			 {0.f, 0.f, 1.f, 0.f},
+			 {-1.f, 1.f, 0.f, 1.f}}},
+	}};
+	std::array<UniformVariant, 3> g2_uniforms = {
+		{// ModelView
+		 std::array<std::array<float, 4>, 4> {
+			 {{1.f, 0.f, 0.f, 0.f}, {0.f, 1.f, 0.f, 0.f}, {0.f, 0.f, 1.f, 0.f}, {0.f, 0.f, 0.f, 1.f}}},
+		 // Texcoord0 Transform
+		 std::array<std::array<float, 3>, 3> {{{1.f, 0.f, 0.f}, {0.f, 1.f, 0.f}, {0.f, 0.f, 1.f}}},
+		 // Sampler 0 Is Indexed Alpha (yes, it always is)
+		 static_cast<int32_t>(1)}};
+	us_1 = rhi.create_uniform_set(ctx, {tcb::span(g1_uniforms)});
+	us_2 = rhi.create_uniform_set(ctx, {tcb::span(g2_uniforms)});
+static constexpr const rhi::Color kClearColor = {0, 0, 0, 1};
+void TwodeePass::graphics(Rhi& rhi, Handle<GraphicsContext> ctx)
+	if (!ctx_ || !data_)
+	{
+		return;
+	}
+	if (output_)
+	{
+		rhi.begin_render_pass(ctx, {render_pass_, output_, std::nullopt, kClearColor});
+	}
+	else
+	{
+		rhi.begin_default_render_pass(ctx, false);
+	}
+	for (auto& list : cmd_lists_)
+	{
+		for (auto& cmd : list.cmds)
+		{
+			if (cmd.elements == 0)
+			{
+				// Don't do anything for 0-element commands
+				// This shouldn't happen, but, just in case...
+				continue;
+			}
+			SRB2_ASSERT(data_->pipelines.find(cmd.pipeline_key) != data_->pipelines.end());
+			Handle<Pipeline> pl = data_->pipelines[cmd.pipeline_key];
+			rhi.bind_pipeline(ctx, pl);
+			if (output_)
+			{
+				rhi.set_viewport(ctx, {0, 0, output_width_, output_height_});
+			}
+			rhi.bind_uniform_set(ctx, 0, us_1);
+			rhi.bind_uniform_set(ctx, 1, us_2);
+			rhi.bind_binding_set(ctx, cmd.binding_set);
+			rhi.bind_index_buffer(ctx, list.ibo);
+			rhi.draw_indexed(ctx, cmd.elements, cmd.index_offset);
+		}
+	}
+	rhi.end_render_pass(ctx);
+void TwodeePass::postpass(Rhi& rhi)
+	if (!ctx_ || !data_)
+	{
+		return;
+	}
+	cmd_lists_.clear();
diff --git a/src/hwr2/pass_twodee.hpp b/src/hwr2/pass_twodee.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..da9c2b563215c3bcaa334a10f78e01476df3f8ad
--- /dev/null
+++ b/src/hwr2/pass_twodee.hpp
@@ -0,0 +1,116 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#ifndef __SRB2_HWR2_PASS_TWODEE_HPP__
+#define __SRB2_HWR2_PASS_TWODEE_HPP__
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <unordered_map>
+#include <variant>
+#include <vector>
+#include "../cxxutil.hpp"
+#include "pass.hpp"
+#include "pass_resource_managers.hpp"
+#include "twodee.hpp"
+namespace srb2::hwr2
+class TwodeePass;
+/// @brief Shared structures to allow multiple 2D instances to share the same atlases
+struct TwodeePassData;
+/// @brief Hash map key for caching pipelines
+struct TwodeePipelineKey
+	Draw2dBlend blend;
+	bool lines;
+	bool operator==(const TwodeePipelineKey& r) const noexcept { return !(blend != r.blend || lines != r.lines); }
+	bool operator!=(const TwodeePipelineKey& r) const noexcept { return !(*this == r); }
+struct MergedTwodeeCommandFlatTexture
+	lumpnum_t lump;
+	bool operator==(const MergedTwodeeCommandFlatTexture& rhs) const noexcept { return lump == rhs.lump; }
+	bool operator!=(const MergedTwodeeCommandFlatTexture& rhs) const noexcept { return !(*this == rhs); }
+struct MergedTwodeeCommand
+	TwodeePipelineKey pipeline_key = {};
+	rhi::Handle<rhi::BindingSet> binding_set = {};
+	std::optional<std::variant<size_t, MergedTwodeeCommandFlatTexture>> texture;
+	const uint8_t* colormap;
+	uint32_t index_offset = 0;
+	uint32_t elements = 0;
+struct MergedTwodeeCommandList
+	rhi::Handle<rhi::Buffer> vbo {};
+	uint32_t vbo_size = 0;
+	rhi::Handle<rhi::Buffer> ibo {};
+	uint32_t ibo_size = 0;
+	std::vector<MergedTwodeeCommand> cmds;
+std::shared_ptr<TwodeePassData> make_twodee_pass_data();
+struct TwodeePass final : public Pass
+	Twodee* ctx_ = nullptr;
+	std::variant<rhi::Handle<rhi::Texture>, rhi::Handle<rhi::Renderbuffer>> out_color_;
+	std::shared_ptr<TwodeePassData> data_;
+	std::shared_ptr<FlatTextureManager> flat_manager_;
+	rhi::Handle<rhi::UniformSet> us_1;
+	rhi::Handle<rhi::UniformSet> us_2;
+	std::vector<MergedTwodeeCommandList> cmd_lists_;
+	std::vector<std::tuple<rhi::Handle<rhi::Buffer>, std::size_t>> vbos_;
+	std::vector<std::tuple<rhi::Handle<rhi::Buffer>, std::size_t>> ibos_;
+	bool rebuild_atlases_ = false;
+	rhi::Handle<rhi::RenderPass> render_pass_;
+	rhi::Handle<rhi::Texture> output_;
+	uint32_t output_width_ = 0;
+	uint32_t output_height_ = 0;
+	TwodeePass();
+	virtual ~TwodeePass();
+	virtual void prepass(rhi::Rhi& rhi) override;
+	virtual void transfer(rhi::Rhi& rhi, rhi::Handle<rhi::TransferContext> ctx) override;
+	virtual void graphics(rhi::Rhi& rhi, rhi::Handle<rhi::GraphicsContext> ctx) override;
+	virtual void postpass(rhi::Rhi& rhi) override;
+} // namespace srb2::hwr2
+template <>
+struct std::hash<srb2::hwr2::TwodeePipelineKey>
+	std::size_t operator()(const srb2::hwr2::TwodeePipelineKey& v) const
+	{
+		std::size_t hash = 0;
+		srb2::hash_combine(hash, v.blend, v.lines);
+		return hash;
+	}
+#endif // __SRB2_HWR2_PASS_TWODEE_HPP__
diff --git a/src/hwr2/twodee.cpp b/src/hwr2/twodee.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8a15234feef3b198098b22001f29815dc8607bbc
--- /dev/null
+++ b/src/hwr2/twodee.cpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#include "twodee.hpp"
+#include "../w_wad.h"
+using namespace srb2;
+using namespace hwr2;
+Twodee::Twodee() = default;
+Twodee::Twodee(const Twodee&) = default;
+Twodee::Twodee(Twodee&&) noexcept = default;
+Twodee& Twodee::operator=(const Twodee&) = default;
+// Will the default move prevent the vectors from losing their allocations? I guess it depends on the STL impl.
+// It's probably worth optimizing around.
+Twodee& Twodee::operator=(Twodee&&) noexcept = default;
+void Draw2dQuadBuilder::done()
+	if (ctx_.lists_.size() == 0)
+	{
+		ctx_.lists_.push_back({});
+	}
+	if (ctx_.lists_.rbegin()->vertices.size() >= (Draw2dList::kMaxVertices - 4))
+	{
+		// The current draw list has too many vertices to fit this command
+		ctx_.lists_.push_back({});
+	}
+	auto& list = *ctx_.lists_.rbegin();
+	quad_.begin_element = list.vertices.size();
+	quad_.begin_index = list.vertices.size();
+	list.vertices.push_back({quad_.xmin, quad_.ymin, 0.f, 0, 0, quad_.r, quad_.g, quad_.b, quad_.a});
+	list.vertices.push_back({quad_.xmax, quad_.ymin, 0.f, 1, 0, quad_.r, quad_.g, quad_.b, quad_.a});
+	list.vertices.push_back({quad_.xmax, quad_.ymax, 0.f, 1, 1, quad_.r, quad_.g, quad_.b, quad_.a});
+	list.vertices.push_back({quad_.xmin, quad_.ymax, 0.f, 0, 1, quad_.r, quad_.g, quad_.b, quad_.a});
+	list.indices.push_back(quad_.begin_element + 0);
+	list.indices.push_back(quad_.begin_element + 1);
+	list.indices.push_back(quad_.begin_element + 2);
+	list.indices.push_back(quad_.begin_element + 0);
+	list.indices.push_back(quad_.begin_element + 2);
+	list.indices.push_back(quad_.begin_element + 3);
+	list.cmds.push_back(quad_);
+void Draw2dVerticesBuilder::done()
+	if (ctx_.lists_.size() == 0)
+	{
+		ctx_.lists_.push_back({});
+	}
+	if (ctx_.lists_.rbegin()->vertices.size() >= (Draw2dList::kMaxVertices - 4))
+	{
+		// The current draw list has too many vertices to fit this command
+		ctx_.lists_.push_back({});
+	}
+	auto& list = *ctx_.lists_.rbegin();
+	tris_.begin_element = list.vertices.size();
+	tris_.begin_index = list.indices.size();
+	if (verts_.empty())
+	{
+		return;
+	}
+	std::size_t i = 0;
+	for (auto& vert : verts_)
+	{
+		list.vertices.push_back({vert[0], vert[1], 0, vert[2], vert[3], r_, g_, b_, a_});
+		list.indices.push_back(tris_.begin_element + i);
+		i++;
+	}
+	list.cmds.push_back(tris_);
+Draw2dBlend srb2::hwr2::get_blend_mode(const Draw2dCmd& cmd) noexcept
+	auto visitor = srb2::Overload {
+		[&](const Draw2dPatchQuad& cmd) { return cmd.blend; },
+		[&](const Draw2dVertices& cmd) { return cmd.blend; }};
+	return std::visit(visitor, cmd);
+bool srb2::hwr2::is_draw_lines(const Draw2dCmd& cmd) noexcept
+	auto visitor = srb2::Overload {
+		[&](const Draw2dPatchQuad& cmd) { return false; },
+		[&](const Draw2dVertices& cmd) { return cmd.lines; }};
+	return std::visit(visitor, cmd);
+std::size_t srb2::hwr2::elements(const Draw2dCmd& cmd) noexcept
+	auto visitor = srb2::Overload {
+		[&](const Draw2dPatchQuad& cmd) -> std::size_t { return 6; },
+		[&](const Draw2dVertices& cmd) -> std::size_t { return cmd.elements; }};
+	return std::visit(visitor, cmd);
diff --git a/src/hwr2/twodee.hpp b/src/hwr2/twodee.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..10cc8e6b1394e4faef93c7a9b6ed0a9c1fb86890
--- /dev/null
+++ b/src/hwr2/twodee.hpp
@@ -0,0 +1,280 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
+#ifndef __SRB2_HWR2_TWODEE_HPP__
+#define __SRB2_HWR2_TWODEE_HPP__
+#include <array>
+#include <cstdint>
+#include <optional>
+#include <utility>
+#include <variant>
+#include <vector>
+#include <tcb/span.hpp>
+#include "../cxxutil.hpp"
+#include "../doomtype.h"
+namespace srb2::hwr2
+struct TwodeeVertex
+	float x;
+	float y;
+	float z;
+	float u;
+	float v;
+	float r;
+	float g;
+	float b;
+	float a;
+enum class Draw2dBlend
+	kModulate,
+	kAdditive,
+	kSubtractive,
+	kReverseSubtractive,
+	kInvertDest
+struct Draw2dPatchQuad
+	std::size_t begin_index = 0;
+	std::size_t begin_element = 0;
+	// A null patch ptr means no patch is drawn
+	const patch_t* patch = nullptr;
+	const uint8_t* colormap = nullptr;
+	Draw2dBlend blend;
+	float r = 0.f;
+	float g = 0.f;
+	float b = 0.f;
+	float a = 0.f;
+	// Size fields are made available to let the consumer modify the vertex data for optimization
+	float xmin = 0.f;
+	float ymin = 0.f;
+	float xmax = 0.f;
+	float ymax = 0.f;
+	float clip_xmin = 0.f;
+	float clip_xmax = 0.f;
+	float clip_ymin = 0.f;
+	float clip_ymax = 0.f;
+	bool clip = false;
+	bool flip = false;
+	bool vflip = false;
+struct Draw2dVertices
+	std::size_t begin_index = 0;
+	std::size_t begin_element = 0;
+	std::size_t elements = 0;
+	Draw2dBlend blend = Draw2dBlend::kModulate;
+	lumpnum_t flat_lump = UINT32_MAX; // LUMPERROR but not loading w_wad.h from this header
+	bool lines = false;
+using Draw2dCmd = std::variant<Draw2dPatchQuad, Draw2dVertices>;
+Draw2dBlend get_blend_mode(const Draw2dCmd& cmd) noexcept;
+bool is_draw_lines(const Draw2dCmd& cmd) noexcept;
+std::size_t elements(const Draw2dCmd& cmd) noexcept;
+struct Draw2dList
+	std::vector<TwodeeVertex> vertices;
+	std::vector<uint16_t> indices;
+	std::vector<Draw2dCmd> cmds;
+	static constexpr const std::size_t kMaxVertices = 65536;
+class Draw2dQuadBuilder;
+class Draw2dVerticesBuilder;
+/// @brief Buffered 2D drawing context
+class Twodee
+	std::vector<Draw2dList> lists_;
+	std::vector<TwodeeVertex> current_verts_;
+	std::vector<uint16_t> current_indices_;
+	friend class Draw2dQuadBuilder;
+	friend class Draw2dVerticesBuilder;
+	Twodee();
+	Twodee(const Twodee&);
+	Twodee(Twodee&&) noexcept;
+	Twodee& operator=(const Twodee&);
+	Twodee& operator=(Twodee&&) noexcept;
+	Draw2dQuadBuilder begin_quad() noexcept;
+	Draw2dVerticesBuilder begin_verts() noexcept;
+	typename std::vector<Draw2dList>::iterator begin() noexcept { return lists_.begin(); }
+	typename std::vector<Draw2dList>::iterator end() noexcept { return lists_.end(); }
+	typename std::vector<Draw2dList>::const_iterator begin() const noexcept { return lists_.cbegin(); }
+	typename std::vector<Draw2dList>::const_iterator end() const noexcept { return lists_.cend(); }
+	typename std::vector<Draw2dList>::const_iterator cbegin() const noexcept { return lists_.cbegin(); }
+	typename std::vector<Draw2dList>::const_iterator cend() const noexcept { return lists_.cend(); }
+class Draw2dQuadBuilder
+	Draw2dPatchQuad quad_;
+	Twodee& ctx_;
+	Draw2dQuadBuilder(Twodee& ctx) : quad_ {}, ctx_ {ctx} {}
+	friend class Twodee;
+	Draw2dQuadBuilder(const Draw2dQuadBuilder&) = delete;
+	Draw2dQuadBuilder(Draw2dQuadBuilder&&) = default;
+	Draw2dQuadBuilder& operator=(const Draw2dQuadBuilder&) = delete;
+	Draw2dQuadBuilder& operator=(Draw2dQuadBuilder&&) = default;
+	Draw2dQuadBuilder& rect(float x, float y, float w, float h)
+	{
+		quad_.xmin = x;
+		quad_.xmax = x + w;
+		quad_.ymin = y;
+		quad_.ymax = y + h;
+		return *this;
+	}
+	Draw2dQuadBuilder& flip(bool flip)
+	{
+		quad_.flip = flip;
+		return *this;
+	}
+	Draw2dQuadBuilder& vflip(bool vflip)
+	{
+		quad_.vflip = vflip;
+		return *this;
+	}
+	Draw2dQuadBuilder& clip(float xmin, float ymin, float xmax, float ymax)
+	{
+		quad_.clip_xmin = xmin;
+		quad_.clip_ymin = ymin;
+		quad_.clip_xmax = xmax;
+		quad_.clip_ymax = ymax;
+		quad_.clip = true;
+		return *this;
+	}
+	Draw2dQuadBuilder& color(float r, float g, float b, float a)
+	{
+		quad_.r = r;
+		quad_.g = g;
+		quad_.b = b;
+		quad_.a = a;
+		return *this;
+	}
+	Draw2dQuadBuilder& patch(const patch_t* patch)
+	{
+		quad_.patch = patch;
+		return *this;
+	}
+	Draw2dQuadBuilder& blend(Draw2dBlend blend)
+	{
+		quad_.blend = blend;
+		return *this;
+	}
+	Draw2dQuadBuilder& colormap(const uint8_t* colormap)
+	{
+		quad_.colormap = colormap;
+		return *this;
+	}
+	void done();
+class Draw2dVerticesBuilder
+	Draw2dVertices tris_;
+	Twodee& ctx_;
+	std::vector<std::array<float, 4>> verts_;
+	float r_ = 1.f;
+	float g_ = 1.f;
+	float b_ = 1.f;
+	float a_ = 1.f;
+	Draw2dVerticesBuilder(Twodee& ctx) : tris_ {}, ctx_ {ctx} {}
+	friend class Twodee;
+	Draw2dVerticesBuilder(const Draw2dVerticesBuilder&) = delete;
+	Draw2dVerticesBuilder(Draw2dVerticesBuilder&&) = default;
+	Draw2dVerticesBuilder& operator=(const Draw2dVerticesBuilder&) = delete;
+	Draw2dVerticesBuilder& operator=(Draw2dVerticesBuilder&&) = default;
+	Draw2dVerticesBuilder& vert(float x, float y, float u = 0, float v = 0)
+	{
+		verts_.push_back({x, y, u, v});
+		tris_.elements += 1;
+		return *this;
+	}
+	Draw2dVerticesBuilder& color(float r, float g, float b, float a)
+	{
+		r_ = r;
+		g_ = g;
+		b_ = b;
+		a_ = a;
+		return *this;
+	}
+	Draw2dVerticesBuilder& blend(Draw2dBlend blend)
+	{
+		tris_.blend = blend;
+		return *this;
+	}
+	Draw2dVerticesBuilder& lines(bool lines)
+	{
+		tris_.lines = lines;
+		return *this;
+	}
+	Draw2dVerticesBuilder& flat(lumpnum_t lump)
+	{
+		tris_.flat_lump = lump;
+		return *this;
+	}
+	void done();
+inline Draw2dQuadBuilder Twodee::begin_quad() noexcept
+	return Draw2dQuadBuilder(*this);
+inline Draw2dVerticesBuilder Twodee::begin_verts() noexcept
+	return Draw2dVerticesBuilder(*this);
+} // namespace srb2::hwr2
+#endif // __SRB2_HWR2_TWODEE_HPP__
diff --git a/src/i_video_common.cpp b/src/i_video_common.cpp
index 14f5e31ae77fae450bc4dda7fa6cbe01c484df02..040c477c0d9a7de35a8a266276ae90738006593d 100644
--- a/src/i_video_common.cpp
+++ b/src/i_video_common.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "i_video.h"
 #include <algorithm>
@@ -7,24 +16,30 @@
 #include <imgui.h>
 #include "cxxutil.hpp"
+#include "f_finale.h"
+#include "hwr2/pass_blit_rect.hpp"
 #include "hwr2/pass_imgui.hpp"
+#include "hwr2/pass_manager.hpp"
+#include "hwr2/pass_postprocess.hpp"
+#include "hwr2/pass_resource_managers.hpp"
 #include "hwr2/pass_software.hpp"
+#include "hwr2/pass_twodee.hpp"
+#include "hwr2/twodee.hpp"
 #include "v_video.h"
-#include "sdl/ogl_sdl.h"
-#include "st_stuff.h" // kill
 #include "d_netcmd.h" // kill
+#include "discord.h"  // kill
 #include "doomstat.h" // kill
-#include "s_sound.h" // kill
-#include "discord.h" // kill
+#include "s_sound.h"  // kill
+#include "sdl/ogl_sdl.h"
+#include "st_stuff.h" // kill
 using namespace srb2;
 using namespace srb2::hwr2;
 using namespace srb2::rhi;
-static SoftwareBlitPass g_sw_pass;
-static ImguiPass g_imgui_pass;
+static std::shared_ptr<PassManager> g_passmanager;
 Handle<Rhi> srb2::sys::g_current_rhi = kNullHandle;
@@ -48,8 +63,7 @@ static void finish_legacy_ogl_update()
 		if (cv_ticrate.value)
-		if (cv_showping.value && netgame &&
-				( consoleplayer != serverplayer || ! server_lagless ))
+		if (cv_showping.value && netgame && (consoleplayer != serverplayer || !server_lagless))
 			if (server_lagless)
@@ -58,11 +72,8 @@ static void finish_legacy_ogl_update()
-				for (
-						player = 1;
-						player < MAXPLAYERS;
-						player++
-				){
+				for (player = 1; player < MAXPLAYERS; player++)
+				{
 					if (D_IsPlayerHumanAndGaming(player))
@@ -91,6 +102,154 @@ static void finish_legacy_ogl_update()
+static std::shared_ptr<PassManager> build_pass_manager()
+	std::shared_ptr<PassManager> manager = std::make_shared<PassManager>();
+	std::shared_ptr<FramebufferManager> framebuffer_manager = std::make_shared<FramebufferManager>();
+	std::shared_ptr<MainPaletteManager> palette_manager = std::make_shared<MainPaletteManager>();
+	std::shared_ptr<FlatTextureManager> flat_texture_manager = std::make_shared<FlatTextureManager>();
+	std::shared_ptr<SoftwarePass> software_pass = std::make_shared<SoftwarePass>();
+	std::shared_ptr<BlitRectPass> blit_sw_pass = std::make_shared<BlitRectPass>(palette_manager, true);
+	std::shared_ptr<TwodeePass> twodee = std::make_shared<TwodeePass>();
+	twodee->flat_manager_ = flat_texture_manager;
+	twodee->data_ = make_twodee_pass_data();
+	twodee->ctx_ = &g_2d;
+	std::shared_ptr<BlitRectPass> pp_simple_blit_pass = std::make_shared<BlitRectPass>(false);
+	std::shared_ptr<PostprocessWipePass> pp_wipe_pass = std::make_shared<PostprocessWipePass>();
+	std::shared_ptr<ImguiPass> imgui_pass = std::make_shared<ImguiPass>();
+	std::shared_ptr<BlitRectPass> final_composite_pass = std::make_shared<BlitRectPass>(true);
+	manager->insert("framebuffer_manager", framebuffer_manager);
+	manager->insert("palette_manager", palette_manager);
+	manager->insert("flat_texture_manager", flat_texture_manager);
+	manager->insert(
+		"3d_prepare",
+		[framebuffer_manager](PassManager& mgr, Rhi&)
+		{
+			const bool sw_enabled = rendermode == render_soft;
+			mgr.set_pass_enabled("software", !g_wipeskiprender && sw_enabled);
+			mgr.set_pass_enabled("blit_sw_prepare", !g_wipeskiprender && sw_enabled);
+			mgr.set_pass_enabled("blit_sw", !g_wipeskiprender && sw_enabled);
+		},
+		[framebuffer_manager](PassManager&, Rhi&)
+		{
+			if (!WipeInAction)
+			{
+				framebuffer_manager->swap_main();
+			}
+		}
+	);
+	manager->insert("software", software_pass);
+	manager->insert(
+		"blit_sw_prepare",
+		[blit_sw_pass, software_pass, framebuffer_manager](PassManager&, Rhi&)
+		{
+			blit_sw_pass->set_texture(software_pass->screen_texture(), vid.width, vid.height);
+			blit_sw_pass->set_output(framebuffer_manager->current_main_color(), vid.width, vid.height, false, false);
+		}
+	);
+	manager->insert("blit_sw", blit_sw_pass);
+	manager->insert(
+		"2d_prepare",
+		[twodee, framebuffer_manager](PassManager& mgr, Rhi&)
+		{
+			twodee->output_ = framebuffer_manager->current_main_color();
+			twodee->output_width_ = vid.width;
+			twodee->output_height_ = vid.height;
+		}
+	);
+	manager->insert("2d", twodee);
+	manager->insert(
+		"pp_final_prepare",
+		[](PassManager& mgr, Rhi&)
+		{
+			mgr.set_pass_enabled("pp_final_wipe_prepare", WipeInAction);
+			mgr.set_pass_enabled("pp_final_wipe", WipeInAction);
+			mgr.set_pass_enabled("pp_final_wipe_flip", WipeInAction);
+		}
+	);
+	manager->insert(
+		"pp_final_simple_blit_prepare",
+		[pp_simple_blit_pass, framebuffer_manager](PassManager&, Rhi&)
+		{
+			Handle<Texture> color = framebuffer_manager->current_main_color();
+			if (WipeInAction && !g_wipereverse)
+			{
+				// Non-reverse wipes are "fade-outs" from the previous frame.
+				color = framebuffer_manager->previous_main_color();
+			}
+			pp_simple_blit_pass->set_texture(color, vid.width, vid.height);
+			pp_simple_blit_pass
+				->set_output(framebuffer_manager->current_post_color(), vid.width, vid.height, false, true);
+		}
+	);
+	manager->insert("pp_final_simple_blit", pp_simple_blit_pass);
+	manager->insert(
+		"pp_final_simple_blit_flip",
+		[framebuffer_manager](PassManager&, Rhi&) { framebuffer_manager->swap_post(); }
+	);
+	manager->insert(
+		"pp_final_wipe_prepare",
+		[pp_wipe_pass, framebuffer_manager](PassManager&, Rhi&)
+		{
+			pp_wipe_pass->set_source(framebuffer_manager->previous_post_color(), vid.width, vid.height);
+			pp_wipe_pass->set_end(framebuffer_manager->current_main_color());
+			pp_wipe_pass->set_target(framebuffer_manager->current_post_color(), vid.width, vid.height);
+		}
+	);
+	manager->insert("pp_final_wipe", pp_wipe_pass);
+	manager->insert(
+		"pp_final_wipe_flip",
+		[framebuffer_manager](PassManager&, Rhi&) { framebuffer_manager->swap_post(); }
+	);
+	manager->insert(
+		"final_composite_prepare",
+		[final_composite_pass, framebuffer_manager](PassManager&, Rhi&)
+		{
+			final_composite_pass->set_texture(framebuffer_manager->previous_post_color(), vid.width, vid.height);
+			final_composite_pass->set_output(kNullHandle, vid.realwidth, vid.realheight, true, true);
+		}
+	);
+	manager->insert("final_composite", final_composite_pass);
+	manager->insert("imgui", imgui_pass);
+	manager->insert(
+		"present",
+		[](PassManager&, Rhi& rhi) {},
+		[framebuffer_manager](PassManager&, Rhi& rhi)
+		{
+			rhi.present();
+			rhi.finish();
+			framebuffer_manager->reset_post();
+			// TODO fix this: it's an ugly hack to work around issues with wipes
+			// Why this works:
+			// - Menus run F_RunWipe which is an inner update loop calling I_FinishUpdate, with this global set
+			// - After exiting F_RunWipe, g_2d should normally be cleared by I_FinishUpdate
+			// - Unfortunately, the menu has already run all its draw calls when exiting F_RunWipe
+			// - That causes a single-frame flash of no 2d content, which is an epilepsy risk.
+			// - By not clearing the 2d context, we are redrawing 2d every frame of the wipe
+			// - This "works" because we draw 2d to the normal color buffer, not the postprocessed screen.
+			// - It does result in the FPS counter being mangled during the wipe though.
+			// - To fix the issues around wipes, wipes need to be a "sub" game state, and eliminate the inner tic loops.
+			if (!WipeInAction)
+			{
+				g_2d = Twodee();
+			}
+		}
+	);
+	return manager;
 void I_FinishUpdate(void)
 	if (rendermode == render_none)
@@ -112,11 +271,9 @@ void I_FinishUpdate(void)
 	io.DisplaySize.y = vid.realheight;
-	if (rhi_changed())
+	if (rhi_changed() || !g_passmanager)
-		// reinitialize passes
-		g_sw_pass = SoftwareBlitPass();
-		g_imgui_pass = ImguiPass();
+		g_passmanager = build_pass_manager();
 	rhi::Rhi* rhi = sys::get_rhi(sys::g_current_rhi);
@@ -127,48 +284,5 @@ void I_FinishUpdate(void)
-	// Prepare phase
-	if (rendermode == render_soft)
-	{
-		g_sw_pass.prepass(*rhi);
-	}
-	g_imgui_pass.prepass(*rhi);
-	// Transfer phase
-	Handle<TransferContext> tc;
-	tc = rhi->begin_transfer();
-	if (rendermode == render_soft)
-	{
-		g_sw_pass.transfer(*rhi, tc);
-	}
-	g_imgui_pass.transfer(*rhi, tc);
-	rhi->end_transfer(tc);
-	// Graphics phase
-	Handle<GraphicsContext> gc;
-	gc = rhi->begin_graphics();
-	// Standard drawing passes...
-	if (rendermode == render_soft)
-	{
-		g_sw_pass.graphics(*rhi, gc);
-	}
-	g_imgui_pass.graphics(*rhi, gc);
-	rhi->end_graphics(gc);
-	// Postpass phase
-	if (rendermode == render_soft)
-	{
-		g_sw_pass.postpass(*rhi);
-	}
-	g_imgui_pass.postpass(*rhi);
-	// Present
-	rhi->present();
-	rhi->finish();
+	g_passmanager->render(*rhi);
diff --git a/src/k_menudraw.c b/src/k_menudraw.c
index b71179ede039619b5d4c3412d2bb0a01c00b9ebc..a7842372140a882b78634b4ccd7bdc31bc99fab8 100644
--- a/src/k_menudraw.c
+++ b/src/k_menudraw.c
@@ -517,10 +517,7 @@ void M_Drawer(void)
 		else if (!WipeInAction && currentMenu != &PAUSE_PlaybackMenuDef)
-			if (rendermode == render_opengl)	// OGL can't handle what SW is doing so let's fake it;
-				V_DrawFadeScreen(122, 3);	// palette index aproximation...
-			else	// Software can keep its unique fade
-				V_DrawCustomFadeScreen("FADEMAP0", 4); // now that's more readable with a faded background (yeah like Quake...)
+			V_DrawFadeScreen(122, 3);
 		if (currentMenu->drawroutine)
@@ -4802,7 +4799,7 @@ static void M_DrawChallengePreview(INT32 x, INT32 y)
 	unlockable_t *ref = NULL;
 	UINT8 *colormap = NULL;
 	UINT16 specialmap = NEXTMAP_INVALID;
 	if (challengesmenu.currentunlock >= MAXUNLOCKABLES)
diff --git a/src/r_patch.cpp b/src/r_patch.cpp
index a27e8035c86344f0c112bea23845b9a6427b7c12..443e940a301465845837b926b3813943ae742c79 100644
--- a/src/r_patch.cpp
+++ b/src/r_patch.cpp
@@ -103,6 +103,7 @@ void Patch_Free(patch_t *patch)
 	if (!patch || patch == missingpat)
diff --git a/src/rhi/gl3_core/gl3_core_rhi.cpp b/src/rhi/gl3_core/gl3_core_rhi.cpp
index 7b9500594421f473de0f6d6056865e5516f5c83e..f98d25d0b3965b0872110f7b3f287151c7f49523 100644
--- a/src/rhi/gl3_core/gl3_core_rhi.cpp
+++ b/src/rhi/gl3_core/gl3_core_rhi.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "gl3_core_rhi.hpp"
 #include <memory>
@@ -13,7 +22,7 @@
 using namespace srb2;
 using namespace rhi;
-#if 1
+#ifndef NDEBUG
 #define GL_ASSERT                                                                                                      \
 	{                                                                                                                  \
 		GLenum __err = gl_->GetError();                                                                                \
@@ -56,6 +65,11 @@ constexpr std::tuple<GLenum, GLenum, GLuint> map_pixel_data_format(rhi::PixelFor
 		size = 1;
+	case rhi::PixelFormat::kRG8:
+		layout = GL_RG;
+		size = 2;
+		break;
 	case rhi::PixelFormat::kRGBA8:
 		layout = GL_RGBA;
@@ -77,6 +91,27 @@ constexpr GLenum map_texture_format(rhi::TextureFormat format)
 		return GL_RGB;
 	case rhi::TextureFormat::kLuminance:
 		return GL_RED;
+	case rhi::TextureFormat::kLuminanceAlpha:
+		return GL_RG;
+	default:
+		return GL_ZERO;
+	}
+constexpr GLenum map_internal_texture_format(rhi::TextureFormat format)
+	switch (format)
+	{
+	case rhi::TextureFormat::kRGBA:
+		return GL_RGBA8;
+	case rhi::TextureFormat::kRGB:
+		return GL_RGB8;
+	case rhi::TextureFormat::kLuminance:
+		return GL_R8;
+	case rhi::TextureFormat::kLuminanceAlpha:
+		return GL_RG8;
+	case rhi::TextureFormat::kDepth:
 		return GL_ZERO;
@@ -286,6 +321,27 @@ constexpr const char* map_uniform_attribute_symbol_name(rhi::UniformName name)
 		return "u_projection";
 	case rhi::UniformName::kTexCoord0Transform:
 		return "u_texcoord0_transform";
+	case rhi::UniformName::kSampler0IsIndexedAlpha:
+		return "u_sampler0_is_indexed_alpha";
+	default:
+		return nullptr;
+	}
+constexpr const char* map_uniform_enable_define(rhi::UniformName name)
+	switch (name)
+	{
+	case rhi::UniformName::kTime:
+		return "ENABLE_U_TIME";
+	case rhi::UniformName::kProjection:
+	case rhi::UniformName::kModelView:
+	case rhi::UniformName::kTexCoord0Transform:
+	case rhi::UniformName::kSampler0IsIndexedAlpha:
 		return nullptr;
@@ -308,6 +364,23 @@ constexpr const char* map_sampler_symbol_name(rhi::SamplerName name)
+constexpr const char* map_sampler_enable_define(rhi::SamplerName name)
+	switch (name)
+	{
+	case rhi::SamplerName::kSampler0:
+		return "ENABLE_S_SAMPLER0";
+	case rhi::SamplerName::kSampler1:
+		return "ENABLE_S_SAMPLER1";
+	case rhi::SamplerName::kSampler2:
+		return "ENABLE_S_SAMPLER2";
+	case rhi::SamplerName::kSampler3:
+		return "ENABLE_S_SAMPLER3";
+	default:
+		return nullptr;
+	}
 constexpr GLenum map_vertex_attribute_format(rhi::VertexAttributeFormat format)
 	switch (format)
@@ -423,8 +496,13 @@ rhi::Handle<rhi::Texture> GlCoreRhi::create_texture(const rhi::TextureDesc& desc
 	SRB2_ASSERT(graphics_context_active_ == false);
-	GLenum internal_format = map_texture_format(desc.format);
+	GLenum internal_format = map_internal_texture_format(desc.format);
 	SRB2_ASSERT(internal_format != GL_ZERO);
+	GLenum format = GL_RGBA;
+	if (desc.format == TextureFormat::kDepth)
+	{
+	}
 	GLuint name = 0;
 	gl_->GenTextures(1, &name);
@@ -439,7 +517,7 @@ rhi::Handle<rhi::Texture> GlCoreRhi::create_texture(const rhi::TextureDesc& desc
-	gl_->TexImage2D(GL_TEXTURE_2D, 0, internal_format, desc.width, desc.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
+	gl_->TexImage2D(GL_TEXTURE_2D, 0, internal_format, desc.width, desc.height, 0, format, GL_UNSIGNED_BYTE, nullptr);
 	GlCoreTexture texture;
@@ -478,13 +556,19 @@ void GlCoreRhi::update_texture(
 	SRB2_ASSERT(texture_slab_.is_valid(texture) == true);
 	auto& t = texture_slab_[texture];
+	// Each row of pixels must be on the unpack alignment boundary.
+	// This alignment is not user changeable until OpenGL 4.
+	constexpr const int32_t kUnpackAlignment = 4;
 	GLenum format = GL_RGBA;
 	GLenum type = GL_UNSIGNED_BYTE;
 	GLuint size = 0;
 	std::tie(format, type, size) = map_pixel_data_format(data_format);
 	SRB2_ASSERT(format != GL_ZERO && type != GL_ZERO);
 	SRB2_ASSERT(map_texture_format(t.desc.format) == format);
-	SRB2_ASSERT(region.w * region.h * size == data.size_bytes());
+	int32_t expected_row_span = (((size * region.w) + kUnpackAlignment - 1) / kUnpackAlignment) * kUnpackAlignment;
+	SRB2_ASSERT(expected_row_span * region.h == data.size_bytes());
 	SRB2_ASSERT(region.x + region.w <= t.desc.width && region.y + region.h <= t.desc.height);
@@ -740,14 +824,79 @@ rhi::Handle<rhi::Pipeline> GlCoreRhi::create_pipeline(const PipelineDesc& desc)
+			for (auto& uniform_group : desc.uniform_input.enabled_uniforms)
+			{
+				for (auto& uniform : uniform_group)
+				{
+					for (auto const& req_uni_group : reqs.uniforms.uniform_groups)
+					{
+						for (auto const& req_uni : req_uni_group)
+						{
+							if (req_uni.name == uniform && !req_uni.required)
+							{
+								vert_src_processed.append("#define ");
+								vert_src_processed.append(map_uniform_enable_define(uniform));
+								vert_src_processed.append("\n");
+							}
+						}
+					}
+				}
+			}
+		}
+		string_i = new_i + 1;
+	} while (string_i != std::string::npos);
+	std::string frag_src_processed;
+	string_i = 0;
+	do
+	{
+		std::string::size_type new_i = frag_src.find('\n', string_i);
+		if (new_i == std::string::npos)
+		{
+			break;
+		}
+		std::string_view line_view(frag_src.c_str() + string_i, new_i - string_i + 1);
+		frag_src_processed.append(line_view);
+		if (line_view.rfind("#version ", 0) == 0)
+		{
+			for (auto& sampler : desc.sampler_input.enabled_samplers)
+			{
+				for (auto const& require_sampler : reqs.samplers.samplers)
+				{
+					if (sampler == require_sampler.name && !require_sampler.required)
+					{
+						frag_src_processed.append("#define ");
+						frag_src_processed.append(map_sampler_enable_define(sampler));
+						frag_src_processed.append("\n");
+					}
+				}
+			}
+			for (auto& uniform_group : desc.uniform_input.enabled_uniforms)
+			{
+				for (auto& uniform : uniform_group)
+				{
+					for (auto const& req_uni_group : reqs.uniforms.uniform_groups)
+					{
+						for (auto const& req_uni : req_uni_group)
+						{
+							if (req_uni.name == uniform && !req_uni.required)
+							{
+								frag_src_processed.append("#define ");
+								frag_src_processed.append(map_uniform_enable_define(uniform));
+								frag_src_processed.append("\n");
+							}
+						}
+					}
+				}
+			}
 		string_i = new_i + 1;
 	} while (string_i != std::string::npos);
 	const char* vert_src_arr[1] = {vert_src_processed.c_str()};
 	const GLint vert_src_arr_lens[1] = {static_cast<GLint>(vert_src_processed.size())};
-	const char* frag_src_arr[1] = {frag_src.c_str()};
-	const GLint frag_src_arr_lens[1] = {static_cast<GLint>(frag_src.size())};
+	const char* frag_src_arr[1] = {frag_src_processed.c_str()};
+	const GLint frag_src_arr_lens[1] = {static_cast<GLint>(frag_src_processed.size())};
 	vertex = gl_->CreateShader(GL_VERTEX_SHADER);
 	gl_->ShaderSource(vertex, 1, vert_src_arr, vert_src_arr_lens);
@@ -1380,6 +1529,8 @@ void GlCoreRhi::bind_index_buffer(Handle<GraphicsContext> ctx, Handle<Buffer> bu
 	SRB2_ASSERT(ib.desc.type == rhi::BufferType::kIndexBuffer);
+	current_index_buffer_ = buffer;
 	gl_->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.buffer);
@@ -1412,11 +1563,20 @@ void GlCoreRhi::draw(Handle<GraphicsContext> ctx, uint32_t vertex_count, uint32_
 void GlCoreRhi::draw_indexed(Handle<GraphicsContext> ctx, uint32_t index_count, uint32_t first_index)
 	SRB2_ASSERT(graphics_context_active_ == true && graphics_context_generation_ == ctx.generation());
+	SRB2_ASSERT(current_index_buffer_ != kNullHandle);
+#ifndef NDEBUG
+	{
+		auto& ib = buffer_slab_[current_index_buffer_];
+		SRB2_ASSERT((index_count + first_index) * 2 + index_buffer_offset_ <= ib.desc.size);
+	}
-		reinterpret_cast<const void*>(first_index * 2 + index_buffer_offset_)
+		(const void*)((size_t)first_index * 2 + index_buffer_offset_)
diff --git a/src/rhi/gl3_core/gl3_core_rhi.hpp b/src/rhi/gl3_core/gl3_core_rhi.hpp
index fa7997b8b21776c8d4d0dadacbdba106933af86b..b5b43afa1ac05b4d51aac447349967eb4da3a068 100644
--- a/src/rhi/gl3_core/gl3_core_rhi.hpp
+++ b/src/rhi/gl3_core/gl3_core_rhi.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #ifndef __SRB2_RHI_GLES2_RHI_HPP__
 #define __SRB2_RHI_GLES2_RHI_HPP__
@@ -145,6 +154,8 @@ class GlCoreRhi final : public Rhi
 	Slab<GlCoreUniformSet> uniform_set_slab_;
 	Slab<GlCoreBindingSet> binding_set_slab_;
+	Handle<Buffer> current_index_buffer_;
 	std::unordered_map<GlCoreFramebufferKey, uint32_t> framebuffers_ {16};
 	struct DefaultRenderPassState
diff --git a/src/rhi/gles2/gles2_rhi.cpp b/src/rhi/gles2/gles2_rhi.cpp
index 5c8134eb6637eb9ed764871c0f1c3c25c50360ac..a1eb92acfc2d8e3c1bea2a66254a870ce53d1fac 100644
--- a/src/rhi/gles2/gles2_rhi.cpp
+++ b/src/rhi/gles2/gles2_rhi.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "gles2_rhi.hpp"
 #include <memory>
diff --git a/src/rhi/gles2/gles2_rhi.hpp b/src/rhi/gles2/gles2_rhi.hpp
index 9858e770ba3bdc9bdb67ef4f259ee92d0407395b..f912941b4b315c19af33bc3eb6684fba54693fe9 100644
--- a/src/rhi/gles2/gles2_rhi.hpp
+++ b/src/rhi/gles2/gles2_rhi.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #ifndef __SRB2_RHI_GLES2_RHI_HPP__
 #define __SRB2_RHI_GLES2_RHI_HPP__
diff --git a/src/rhi/handle.hpp b/src/rhi/handle.hpp
index bda2928faa1411611acf12e46eb450b58a136a99..282a924dabad57ebac37e4fa35146cdd53ad2d22 100644
--- a/src/rhi/handle.hpp
+++ b/src/rhi/handle.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #ifndef __SRB2_RHI_HANDLE_HPP__
 #define __SRB2_RHI_HANDLE_HPP__
diff --git a/src/rhi/rhi.cpp b/src/rhi/rhi.cpp
index c63282b9570f078479e167d2b723dc4a6faa695c..7e166246a384b8407f00b13301b6e144490f29b5 100644
--- a/src/rhi/rhi.cpp
+++ b/src/rhi/rhi.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "rhi.hpp"
 #include <exception>
@@ -14,8 +23,9 @@ const ProgramRequirements srb2::rhi::kProgramRequirementsUnshaded = {
 		 ProgramVertexInput {VertexAttributeName::kTexCoord0, VertexAttributeFormat::kFloat2, false},
 		 ProgramVertexInput {VertexAttributeName::kColor, VertexAttributeFormat::kFloat4, false}}},
 	ProgramUniformRequirements {
-		{{{UniformName::kProjection}}, {{UniformName::kModelView, UniformName::kTexCoord0Transform}}}},
-	ProgramSamplerRequirements {{ProgramSamplerInput {SamplerName::kSampler0, true}}}};
+		{{{{UniformName::kProjection, true}}},
+		 {{{UniformName::kModelView, true}, {UniformName::kTexCoord0Transform, true}}}}},
+	ProgramSamplerRequirements {{{SamplerName::kSampler0, true}}}};
 const ProgramRequirements srb2::rhi::kProgramRequirementsUnshadedPaletted = {
 	ProgramVertexInputRequirements {
@@ -23,9 +33,19 @@ const ProgramRequirements srb2::rhi::kProgramRequirementsUnshadedPaletted = {
 		 ProgramVertexInput {VertexAttributeName::kTexCoord0, VertexAttributeFormat::kFloat2, false},
 		 ProgramVertexInput {VertexAttributeName::kColor, VertexAttributeFormat::kFloat4, false}}},
 	ProgramUniformRequirements {
-		{{{UniformName::kProjection}}, {{UniformName::kModelView, UniformName::kTexCoord0Transform}}}},
+		{{{{UniformName::kProjection, true}}},
+		 {{{UniformName::kModelView, true},
+		   {UniformName::kTexCoord0Transform, true},
+		   {UniformName::kSampler0IsIndexedAlpha, false}}}}},
 	ProgramSamplerRequirements {
-		{ProgramSamplerInput {SamplerName::kSampler0, true}, ProgramSamplerInput {SamplerName::kSampler1, true}}}};
+		{{SamplerName::kSampler0, true}, {SamplerName::kSampler1, true}, {SamplerName::kSampler2, false}}}};
+const ProgramRequirements srb2::rhi::kProgramRequirementsPostprocessWipe = {
+	ProgramVertexInputRequirements {
+		{ProgramVertexInput {VertexAttributeName::kPosition, VertexAttributeFormat::kFloat3, true},
+		 ProgramVertexInput {VertexAttributeName::kTexCoord0, VertexAttributeFormat::kFloat2, true}}},
+	ProgramUniformRequirements {{{{{UniformName::kProjection, true}, {UniformName::kModelView, true}}}}},
+	ProgramSamplerRequirements {{{SamplerName::kSampler0, true}, {SamplerName::kSampler1, true}}}};
 const ProgramRequirements& rhi::program_requirements_for_program(PipelineProgram program) noexcept
@@ -35,6 +55,8 @@ const ProgramRequirements& rhi::program_requirements_for_program(PipelineProgram
 		return kProgramRequirementsUnshaded;
 	case PipelineProgram::kUnshadedPaletted:
 		return kProgramRequirementsUnshadedPaletted;
+	case PipelineProgram::kPostprocessWipe:
+		return kProgramRequirementsPostprocessWipe;
diff --git a/src/rhi/rhi.hpp b/src/rhi/rhi.hpp
index a44423aee482cd17cccc2041681fa6d3fae6a924..43659fe402522d65b55e70a0a688809a13f7fe95 100644
--- a/src/rhi/rhi.hpp
+++ b/src/rhi/rhi.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #ifndef __SRB2_RHI_RHI_HPP__
 #define __SRB2_RHI_RHI_HPP__
@@ -63,6 +72,7 @@ enum class UniformFormat
 enum class PixelFormat
+	kRG8,
@@ -71,8 +81,10 @@ enum class PixelFormat
 enum class TextureFormat
+	kLuminanceAlpha,
+	kRGBA,
+	kDepth
 enum class CompareFunc
@@ -152,7 +164,8 @@ enum class AttachmentStoreOp
 enum class PipelineProgram
-	kUnshadedPaletted
+	kUnshadedPaletted,
+	kPostprocessWipe
 enum class BufferType
@@ -181,7 +194,8 @@ enum class UniformName
-	kTexCoord0Transform
+	kTexCoord0Transform,
+	kSampler0IsIndexedAlpha
 enum class SamplerName
@@ -237,12 +251,12 @@ struct ProgramVertexInputRequirements
 struct ProgramUniformRequirements
-	srb2::StaticVec<srb2::StaticVec<UniformName, 16>, 4> uniform_groups;
+	srb2::StaticVec<srb2::StaticVec<ProgramUniformInput, 16>, 4> uniform_groups;
 struct ProgramSamplerRequirements
-	std::array<std::optional<ProgramSamplerInput>, kMaxSamplers> samplers;
+	srb2::StaticVec<ProgramSamplerInput, kMaxSamplers> samplers;
 struct ProgramRequirements
@@ -254,6 +268,7 @@ struct ProgramRequirements
 extern const ProgramRequirements kProgramRequirementsUnshaded;
 extern const ProgramRequirements kProgramRequirementsUnshadedPaletted;
+extern const ProgramRequirements kProgramRequirementsPostprocessWipe;
 const ProgramRequirements& program_requirements_for_program(PipelineProgram program) noexcept;
@@ -288,6 +303,8 @@ inline constexpr const UniformFormat uniform_format(UniformName name) noexcept
 		return UniformFormat::kMat4;
 	case UniformName::kTexCoord0Transform:
 		return UniformFormat::kMat3;
+	case UniformName::kSampler0IsIndexedAlpha:
+		return UniformFormat::kInt;
 		return UniformFormat::kFloat;
@@ -309,8 +326,8 @@ struct VertexAttributeLayoutDesc
 struct VertexInputDesc
-	std::vector<VertexBufferLayoutDesc> buffer_layouts;
-	std::vector<VertexAttributeLayoutDesc> attr_layouts;
+	srb2::StaticVec<VertexBufferLayoutDesc, 4> buffer_layouts;
+	srb2::StaticVec<VertexAttributeLayoutDesc, 8> attr_layouts;
 struct UniformInputDesc
@@ -489,6 +506,9 @@ struct GraphicsContext
+/// @brief The unpack alignment of a row span when uploading pixels to the device.
+constexpr const std::size_t kPixelRowUnpackAlignment = 4;
 /// @brief An active handle to a rendering device.
 struct Rhi
diff --git a/src/sdl/i_video.cpp b/src/sdl/i_video.cpp
index bf5deb873137924bd52c290df2b62f7f4a12bcad..d86f4a4b4b018d0fa1ae6c7499ac20f0fa421d98 100644
--- a/src/sdl/i_video.cpp
+++ b/src/sdl/i_video.cpp
@@ -231,7 +231,11 @@ static void SDLSetMode(INT32 width, INT32 height, SDL_bool fullscreen, SDL_bool
 		OglSdlSurface(vid.width, vid.height);
+	else
+	{
+		SDL_GL_SetSwapInterval(cv_vidwait.value ? 1 : 0);
+	}
 	SDL_GetWindowSize(window, &width, &height);
 	vid.realwidth = static_cast<uint32_t>(width);
diff --git a/src/sdl/rhi_gl3_core_platform.cpp b/src/sdl/rhi_gl3_core_platform.cpp
index d8c0cde337f2524d0f7be7092165573ff5dbdef9..5de7eef70095a961a7893fe11720abfa33a8576e 100644
--- a/src/sdl/rhi_gl3_core_platform.cpp
+++ b/src/sdl/rhi_gl3_core_platform.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "rhi_gl3_core_platform.hpp"
 #include <SDL.h>
@@ -33,6 +42,10 @@ std::tuple<std::string, std::string> SdlGlCorePlatform::find_shader_sources(rhi:
 		vertex_lump_name = "rhi_glcore_vertex_unshadedpaletted";
 		fragment_lump_name = "rhi_glcore_fragment_unshadedpaletted";
+	case rhi::PipelineProgram::kPostprocessWipe:
+		vertex_lump_name = "rhi_glcore_vertex_postprocesswipe";
+		fragment_lump_name = "rhi_glcore_fragment_postprocesswipe";
+		break;
diff --git a/src/sdl/rhi_gl3_core_platform.hpp b/src/sdl/rhi_gl3_core_platform.hpp
index 0c0f6f4f3442f4995a0f11f57ea2b19e0b50792f..9522e4ba4b1b887496439c337f4b88855b8d19a9 100644
--- a/src/sdl/rhi_gl3_core_platform.hpp
+++ b/src/sdl/rhi_gl3_core_platform.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
diff --git a/src/sdl/rhi_gles2_platform.cpp b/src/sdl/rhi_gles2_platform.cpp
index d91a3d2bfbec231addbcae829c97fe5630a51fbd..edf5fe2016d1e01e15bcd6617282be3a32a02f4f 100644
--- a/src/sdl/rhi_gles2_platform.cpp
+++ b/src/sdl/rhi_gles2_platform.cpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
 #include "rhi_gles2_platform.hpp"
 #include <SDL.h>
diff --git a/src/sdl/rhi_gles2_platform.hpp b/src/sdl/rhi_gles2_platform.hpp
index 19970d8f19b8118aec6c6baebbff7c607b81d748..b434c9c2ea15000ba816240c3e86f03054c8949e 100644
--- a/src/sdl/rhi_gles2_platform.hpp
+++ b/src/sdl/rhi_gles2_platform.hpp
@@ -1,3 +1,12 @@
+// Copyright (C) 2023 by Ronald "Eidolon" Kinard
+// This program is free software distributed under the
+// terms of the GNU General Public License, version 2.
+// See the 'LICENSE' file for more details.
diff --git a/src/v_video.cpp b/src/v_video.cpp
index 779a3a6c60239511445a6e04e2693cb66763dd53..4e1f0d0f721fa3eac7fca141c8a0b50b479528df 100644
--- a/src/v_video.cpp
+++ b/src/v_video.cpp
@@ -41,6 +41,8 @@
 #include "k_boss.h"
 #include "i_time.h"
+using namespace srb2;
 // Each screen is [vid.width*vid.height];
 UINT8 *screens[5];
 // screens[0] = main display window
@@ -97,8 +99,12 @@ RGBA_t *pLocalPalette = NULL;
 RGBA_t *pMasterPalette = NULL;
 RGBA_t *pGammaCorrectedPalette = NULL;
+hwr2::Twodee srb2::g_2d;
 static size_t currentPaletteSize;
+static UINT8 softwaretranstohwr[11]    = {  0, 25, 51, 76,102,127,153,178,204,229,255};
 The following was an extremely helpful resource when developing my Colour Cube LUT.
@@ -650,7 +656,7 @@ void V_AdjustXYWithSnap(INT32 *x, INT32 *y, UINT32 options, INT32 dupx, INT32 du
-static cliprect_t cliprect;
+static cliprect_t cliprect = {0};
 const cliprect_t *V_GetClipRect(void)
@@ -771,16 +777,11 @@ static inline UINT8 transmappedpdraw(const UINT8 *dest, const UINT8 *source, fix
 // Draws a patch scaled to arbitrary size.
 void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vscale, INT32 scrn, patch_t *patch, const UINT8 *colormap)
-	UINT8 (*patchdrawfunc)(const UINT8*, const UINT8*, fixed_t);
 	UINT32 alphalevel, blendmode;
-	fixed_t col, ofs, colfrac, rowfrac, fdup, vdup;
+	fixed_t vdup;
 	INT32 dupx, dupy;
-	const column_t *column;
-	UINT8 *desttop, *dest, *deststart, *destend;
-	const UINT8 *source, *deststop;
 	fixed_t pwidth; // patch width
-	fixed_t offx = 0; // x offset
 	const cliprect_t *clip = V_GetClipRect();
@@ -796,8 +797,6 @@ void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vsca
-	patchdrawfunc = standardpdraw;
 	if ((blendmode = ((scrn & V_BLENDMASK) >> V_BLENDSHIFT)))
 		blendmode++; // realign to constants
 	if ((alphalevel = ((scrn & V_ALPHAMASK) >> V_ALPHASHIFT)))
@@ -812,15 +811,6 @@ void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vsca
 		if (alphalevel >= 10) // Still inelegible to render?
-	if ((v_translevel = R_GetBlendTable(blendmode, alphalevel)))
-		patchdrawfunc = translucentpdraw;
-	v_colormap = NULL;
-	if (colormap)
-	{
-		v_colormap = colormap;
-		patchdrawfunc = (v_translevel) ? transmappedpdraw : mappedpdraw;
-	}
 	dupx = vid.dupx;
 	dupy = vid.dupy;
@@ -843,11 +833,9 @@ void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vsca
 	// only use one dup, to avoid stretching (har har)
 	dupx = dupy = (dupx < dupy ? dupx : dupy);
-	fdup = vdup = FixedMul(dupx<<FRACBITS, pscale);
+	vdup = FixedMul(dupx<<FRACBITS, pscale);
 	if (vscale != pscale)
 		vdup = FixedMul(dupx<<FRACBITS, vscale);
-	colfrac = FixedDiv(FRACUNIT, fdup);
-	rowfrac = FixedDiv(FRACUNIT, vdup);
 		fixed_t offsetx = 0, offsety = 0;
@@ -869,18 +857,10 @@ void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vsca
 		y -= offsety;
-	desttop = screens[scrn&V_SCREENMASK];
-	if (!desttop)
-		return;
-	deststop = desttop + vid.rowbytes * vid.height;
 	if (scrn & V_NOSCALESTART)
 		x >>= FRACBITS;
 		y >>= FRACBITS;
-		desttop += (y*vid.width) + x;
@@ -894,8 +874,6 @@ void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vsca
 			V_AdjustXYWithSnap(&x, &y, scrn, dupx, dupy);
-		desttop += (y*vid.width) + x;
 	if (pscale != FRACUNIT) // scale width properly
@@ -908,104 +886,73 @@ void V_DrawStretchyFixedPatch(fixed_t x, fixed_t y, fixed_t pscale, fixed_t vsca
 		pwidth = patch->width * dupx;
-	deststart = desttop;
-	destend = desttop + pwidth;
+	float fdupy = FIXED_TO_FLOAT(vdup);
-	for (col = 0; (col>>FRACBITS) < patch->width; col += colfrac, ++offx, desttop++)
-	{
-		INT32 topdelta, prevdelta = -1;
+	float fx = x;
+	float fy = y;
+	float fx2 = fx + pwidth;
+	float fy2 = fy + static_cast<float>(patch->height) * fdupy;
+	float falpha = 1.f;
+	float umin = 0.f;
+	float umax = 1.f;
+	float vmin = 0.f;
+	float vmax = 1.f;
-		if (scrn & V_FLIP) // offx is measured from right edge instead of left
-		{
-			if (x+pwidth-offx < (clip ? clip->left : 0)) // don't draw off the left of the screen (WRAP PREVENTION)
-				break;
-			if (x+pwidth-offx >= (clip ? clip->right : vid.width)) // don't draw off the right of the screen (WRAP PREVENTION)
-				continue;
-		}
-		else
-		{
-			if (x+offx < (clip ? clip->left : 0)) // don't draw off the left of the screen (WRAP PREVENTION)
-				continue;
-			if (x+offx >= (clip ? clip->right : vid.width)) // don't draw off the right of the screen (WRAP PREVENTION)
-				break;
-		}
-		column = (const column_t *)((const UINT8 *)(patch->columns) + (patch->columnofs[col>>FRACBITS]));
-		while (column->topdelta != 0xff)
-		{
-			fixed_t offy = 0;
-			topdelta = column->topdelta;
-			if (topdelta <= prevdelta)
-				topdelta += prevdelta;
-			prevdelta = topdelta;
-			source = (const UINT8 *)(column) + 3;
-			dest = desttop;
-			if (scrn & V_FLIP)
-				dest = deststart + (destend - dest);
-			topdelta = FixedInt(FixedMul(topdelta << FRACBITS, vdup));
-			dest += topdelta * vid.width;
-			if (scrn & V_VFLIP)
-			{
-				for (ofs = (column->length << FRACBITS)-1; dest < deststop && ofs >= 0; ofs -= rowfrac, ++offy)
-				{
-					if (clip != NULL)
-					{
-						const INT32 cy = y + topdelta - offy;
-						if (cy < clip->top) // don't draw off the top of the clip rect
-						{
-							dest += vid.width;
-							continue;
-						}
-						if (cy >= clip->bottom) // don't draw off the bottom of the clip rect
-						{
-							dest += vid.width;
-							continue;
-						}
-					}
-					if (dest >= screens[scrn&V_SCREENMASK]) // don't draw off the top of the screen (CRASH PREVENTION)
-						*dest = patchdrawfunc(dest, source, ofs);
+	// flip UVs
+	if (scrn & V_FLIP)
+	{
+		umin = 1.f - umin;
+		umax = 1.f - umax;
+	}
+	if (scrn & V_VFLIP)
+	{
+		vmin = 1.f - vmin;
+		vmax = 1.f - vmax;
+	}
-					dest += vid.width;
-				}
-			}
-			else
-			{
-				for (ofs = 0; dest < deststop && ofs < (column->length << FRACBITS); ofs += rowfrac, ++offy)
-				{
-					if (clip != NULL)
-					{
-						const INT32 cy = y + topdelta + offy;
-						if (cy < clip->top) // don't draw off the top of the clip rect
-						{
-							dest += vid.width;
-							continue;
-						}
-						if (cy >= clip->bottom) // don't draw off the bottom of the clip rect
-						{
-							dest += vid.width;
-							continue;
-						}
-					}
+	if (alphalevel > 0 && alphalevel <= 10)
+	{
+		falpha = (10 - alphalevel) / 10.f;
+	}
+	hwr2::Draw2dBlend blend = hwr2::Draw2dBlend::kModulate;
+	switch (blendmode)
+	{
+		blend = hwr2::Draw2dBlend::kModulate;
+		break;
+	case AST_ADD:
+		blend = hwr2::Draw2dBlend::kAdditive;
+		break;
-					if (dest >= screens[scrn&V_SCREENMASK]) // don't draw off the top of the screen (CRASH PREVENTION)
-						*dest = patchdrawfunc(dest, source, ofs);
+	// Note: SRB2 has these blend modes flipped compared to GL and Vulkan.
+	// SRB2's Subtract is Dst - Src. OpenGL is Src - Dst. And vice versa for reverse.
+	// Twodee will use the GL definitions.
+		blend = hwr2::Draw2dBlend::kReverseSubtractive;
+		break;
+		blend = hwr2::Draw2dBlend::kSubtractive;
+		break;
+	default:
+		blend = hwr2::Draw2dBlend::kModulate;
+		break;
+	}
-					dest += vid.width;
-				}
-			}
+	auto builder = g_2d.begin_quad();
+	builder
+		.patch(patch)
+		.rect(fx, fy, fx2 - fx, fy2 - fy)
+		.flip((scrn & V_FLIP) > 0)
+		.vflip((scrn & V_VFLIP) > 0)
+		.color(1, 1, 1, falpha)
+		.blend(blend)
+		.colormap(colormap);
-			column = (const column_t *)((const UINT8 *)column + column->length + 4);
-		}
+	if (clip && clip->enabled)
+	{
+		builder.clip(clip->left, clip->top, clip->right, clip->bottom);
+	builder.done();
 // Draws a patch cropped and scaled to arbitrary size.
@@ -1067,9 +1014,6 @@ void V_DrawBlock(INT32 x, INT32 y, INT32 scrn, INT32 width, INT32 height, const
 void V_DrawFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c)
-	UINT8 *dest;
-	const UINT8 *deststop;
 	if (rendermode == render_none)
@@ -1122,13 +1066,18 @@ void V_DrawFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c)
 	if (y + h > vid.height)
 		h = vid.height - y;
-	dest = screens[0] + y*vid.width + x;
-	deststop = screens[0] + vid.rowbytes * vid.height;
 	c &= 255;
-	for (;(--h >= 0) && dest < deststop; dest += vid.width)
-		memset(dest, c, w * vid.bpp);
+	RGBA_t color = pMasterPalette[c];
+	UINT8 r = (color.rgba & 0xFF);
+	UINT8 g = (color.rgba & 0xFF00) >> 8;
+	UINT8 b = (color.rgba & 0xFF0000) >> 16;
+	g_2d.begin_quad()
+		.patch(nullptr)
+		.color(r / 255.f, g / 255.f, b / 255.f, 1.f)
+		.rect(x, y, w, h)
+		.done();
 #ifdef HWRENDER
@@ -1169,10 +1118,6 @@ static UINT32 V_GetHWConsBackColor(void)
 void V_DrawFillConsoleMap(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c)
-	UINT8 *dest;
-	const UINT8 *deststop;
-	INT32 u;
-	UINT8 *fadetable;
 	UINT32 alphalevel = 0;
 	if (rendermode == render_none)
@@ -1231,37 +1176,18 @@ void V_DrawFillConsoleMap(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c)
 	if (y + h > vid.height)
 		h = vid.height-y;
-	dest = screens[0] + y*vid.width + x;
-	deststop = screens[0] + vid.rowbytes * vid.height;
 	c &= 255;
-	// Jimita (12-04-2018)
-	if (alphalevel)
-	{
-		fadetable = R_GetTranslucencyTable(alphalevel) + (c*256);
-		for (;(--h >= 0) && dest < deststop; dest += vid.width)
-		{
-			u = 0;
-			while (u < w)
-			{
-				dest[u] = fadetable[consolebgmap[dest[u]]];
-				u++;
-			}
-		}
-	}
-	else
-	{
-		for (;(--h >= 0) && dest < deststop; dest += vid.width)
-		{
-			u = 0;
-			while (u < w)
-			{
-				dest[u] = consolebgmap[dest[u]];
-				u++;
-			}
-		}
-	}
+	UINT32 hwcolor = V_GetHWConsBackColor();
+	float r = ((hwcolor & 0xFF000000) >> 24) / 255.f;
+	float g = ((hwcolor & 0xFF0000) >> 16) / 255.f;
+	float b = ((hwcolor & 0xFF00) >> 8) / 255.f;
+	float a = 0.5f; // alphalevel is unused in GL??
+	g_2d.begin_quad()
+		.rect(x, y, w, h)
+		.blend(hwr2::Draw2dBlend::kModulate)
+		.color(r, g, b, a)
+		.done();
@@ -1273,9 +1199,7 @@ void V_DrawFillConsoleMap(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c)
 void V_DrawDiag(INT32 x, INT32 y, INT32 wh, INT32 c)
-	UINT8 *dest;
-	const UINT8 *deststop;
-	INT32 w, h, wait = 0;
+	INT32 w, h;
 	if (rendermode == render_none)
@@ -1321,7 +1245,6 @@ void V_DrawDiag(INT32 x, INT32 y, INT32 wh, INT32 c)
 		return; // zero width/height wouldn't draw anything
 	if (x + w > vid.width)
-		wait = w - (vid.width - x);
 		w = vid.width - x;
 	if (y + w > vid.height)
@@ -1330,18 +1253,23 @@ void V_DrawDiag(INT32 x, INT32 y, INT32 wh, INT32 c)
 	if (h > w)
 		h = w;
-	dest = screens[0] + y*vid.width + x;
-	deststop = screens[0] + vid.rowbytes * vid.height;
 	c &= 255;
-	for (;(--h >= 0) && dest < deststop; dest += vid.width)
-		memset(dest, c, w * vid.bpp);
-		if (wait)
-			wait--;
-		else
-			w--;
+		auto builder = g_2d.begin_verts();
+		const RGBA_t color = pMasterPalette[c];
+		const float r = ((color.rgba & 0xFF000000) >> 24) / 255.f;
+		const float g = ((color.rgba & 0xFF0000) >> 16) / 255.f;
+		const float b = ((color.rgba & 0xFF00) >> 8) / 255.f;
+		const float a = 1.f;
+		builder.color(r, g, b, a);
+		builder
+			.vert(x, y)
+			.vert(x + wh, y + wh)
+			.vert(x, y + wh)
+			.done();
@@ -1355,11 +1283,6 @@ void V_DrawDiag(INT32 x, INT32 y, INT32 wh, INT32 c)
 void V_DrawFadeFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c, UINT16 color, UINT8 strength)
-	UINT8 *dest;
-	const UINT8 *deststop;
-	INT32 u;
-	UINT8 *fadetable;
 	if (rendermode == render_none)
@@ -1403,23 +1326,42 @@ void V_DrawFadeFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c, UINT16 color, U
 	if (y + h > vid.height)
 		h = vid.height-y;
-	dest = screens[0] + y*vid.width + x;
-	deststop = screens[0] + vid.rowbytes * vid.height;
+	float r;
+	float g;
+	float b;
+	float a;
+	hwr2::Draw2dBlend blendmode;
-	c &= 255;
+	if (color & 0xFF00)
+	{
+		// Historical COLORMAP fade
+		// In Ring Racers this is a Mega Drive style per-channel fade (though it'd probably be cool in SRB2 too)
+		// HWR2 will implement as a rev-subtractive rect because colormaps aren't possible in hardware
+		float fstrength = std::clamp(strength / 31.f, 0.f, 1.f);
+		r = std::clamp((fstrength - (0.f / 3.f)) * 3.f, 0.f, 1.f);
+		g = std::clamp((fstrength - (1.f / 3.f)) * 3.f, 0.f, 1.f);
+		b = std::clamp((fstrength - (2.f / 3.f)) * 3.f, 0.f, 1.f);
+		a = 1;
-	fadetable = ((color & 0xFF00) // Color is not palette index?
-		? ((UINT8 *)colormaps + strength*256) // Do COLORMAP fade.
-		: ((UINT8 *)R_GetTranslucencyTable((9-strength)+1) + color*256)); // Else, do TRANSMAP** fade.
-	for (;(--h >= 0) && dest < deststop; dest += vid.width)
+		blendmode = hwr2::Draw2dBlend::kReverseSubtractive;
+	}
+	else
-		u = 0;
-		while (u < w)
-		{
-			dest[u] = fadetable[dest[u]];
-			u++;
-		}
+		// Historically TRANSMAP fade
+		// This is done by modulative (transparent) blend to the given palette color.
+		byteColor_t bc = V_GetColor(color).s;
+		r = bc.red / 255.f;
+		g = bc.green / 255.f;
+		b = bc.blue / 255.f;
+		a = softwaretranstohwr[std::clamp(static_cast<int>(strength), 0, 10)] / 255.f;
+		blendmode = hwr2::Draw2dBlend::kModulate;
+	g_2d.begin_quad()
+		.blend(blendmode)
+		.color(r, g, b, a)
+		.rect(x, y, w, h)
+		.done();
@@ -1427,11 +1369,10 @@ void V_DrawFadeFill(INT32 x, INT32 y, INT32 w, INT32 h, INT32 c, UINT16 color, U
 void V_DrawFlatFill(INT32 x, INT32 y, INT32 w, INT32 h, lumpnum_t flatnum)
-	INT32 u, v, dupx, dupy;
-	fixed_t dx, dy, xfrac, yfrac;
-	const UINT8 *src, *deststop;
-	UINT8 *flat, *dest;
-	size_t size, lflatsize, flatshift;
+	INT32 dupx;
+	INT32 dupy;
+	size_t size;
+	size_t lflatsize;
 #ifdef HWRENDER
 	if (rendermode == render_opengl)
@@ -1440,89 +1381,52 @@ void V_DrawFlatFill(INT32 x, INT32 y, INT32 w, INT32 h, lumpnum_t flatnum)
 	size = W_LumpLength(flatnum);
 	switch (size)
 		case 4194304: // 2048x2048 lump
 			lflatsize = 2048;
-			flatshift = 11;
 		case 1048576: // 1024x1024 lump
 			lflatsize = 1024;
-			flatshift = 10;
 		case 262144:// 512x512 lump
 			lflatsize = 512;
-			flatshift = 9;
 		case 65536: // 256x256 lump
 			lflatsize = 256;
-			flatshift = 8;
 		case 16384: // 128x128 lump
 			lflatsize = 128;
-			flatshift = 7;
 		case 1024: // 32x32 lump
 			lflatsize = 32;
-			flatshift = 5;
 		case 256: // 16x16 lump
 			lflatsize = 16;
-			flatshift = 4;
 		case 64: // 8x8 lump
 			lflatsize = 8;
-			flatshift = 3;
 		default: // 64x64 lump
 			lflatsize = 64;
-			flatshift = 6;
-	flat = static_cast<UINT8*>(W_CacheLumpNum(flatnum, PU_CACHE));
+	float fsize = lflatsize;
 	dupx = dupy = (vid.dupx < vid.dupy ? vid.dupx : vid.dupy);
-	dest = screens[0] + y*dupy*vid.width + x*dupx;
-	deststop = screens[0] + vid.rowbytes * vid.height;
-	// from V_DrawScaledPatch
-	if (vid.width != BASEVIDWIDTH * dupx)
-	{
-		// dupx adjustments pretend that screen width is BASEVIDWIDTH * dupx,
-		// so center this imaginary screen
-		dest += (vid.width - (BASEVIDWIDTH * dupx)) / 2;
-	}
-	if (vid.height != BASEVIDHEIGHT * dupy)
-	{
-		// same thing here
-		dest += (vid.height - (BASEVIDHEIGHT * dupy)) * vid.width / 2;
-	}
-	w *= dupx;
-	h *= dupy;
-	dx = FixedDiv(FRACUNIT, dupx<<(FRACBITS-2));
-	dy = FixedDiv(FRACUNIT, dupy<<(FRACBITS-2));
-	yfrac = 0;
-	for (v = 0; v < h; v++, dest += vid.width)
-	{
-		xfrac = 0;
-		src = flat + (((yfrac>>FRACBITS) & (lflatsize - 1)) << flatshift);
-		for (u = 0; u < w; u++)
-		{
-			if (&dest[u] > deststop)
-				return;
-			dest[u] = src[(xfrac>>FRACBITS)&(lflatsize-1)];
-			xfrac += dx;
-		}
-		yfrac += dy;
-	}
+	g_2d.begin_verts()
+		.flat(flatnum)
+		.vert(x * dupx, y * dupy, 0, 0)
+		.vert(x * dupx + w * dupx, y * dupy, w / fsize, 0)
+		.vert(x * dupx + w * dupx, y * dupy + h * dupy, w / fsize, h / fsize)
+		.vert(x * dupx, y * dupy, 0, 0)
+		.vert(x * dupx + w * dupx, y * dupy + h * dupy, w / fsize, h / fsize)
+		.vert(x * dupx, y * dupy + h * dupy, 0, h / fsize)
+		.done();
@@ -1619,21 +1523,42 @@ void V_DrawFadeScreen(UINT16 color, UINT8 strength)
+	float r;
+	float g;
+	float b;
+	float a;
+	hwr2::Draw2dBlend blendmode;
+	if (color & 0xFF00)
-		const UINT8 *fadetable =
-			(color > 0xFFF0) // Grab a specific colormap palette?
-			? R_GetTranslationColormap(color | 0xFFFF0000, static_cast<skincolornum_t>(strength), GTC_CACHE)
-			: ((color & 0xFF00) // Color is not palette index?
-			? ((UINT8 *)colormaps + strength*256) // Do COLORMAP fade.
-			: ((UINT8 *)R_GetTranslucencyTable((9-strength)+1) + color*256)); // Else, do TRANSMAP** fade.
-		const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
-		UINT8 *buf = screens[0];
+		// Historical COLORMAP fade
+		// In Ring Racers this is a Mega Drive style per-channel fade (though it'd probably be cool in SRB2 too)
+		// HWR2 will implement as a rev-subtractive rect because colormaps aren't possible in hardware
+		float fstrength = std::clamp(strength / 31.f, 0.f, 1.f);
+		r = std::clamp((fstrength - (0.f / 3.f)) * 3.f, 0.f, 1.f);
+		g = std::clamp((fstrength - (1.f / 3.f)) * 3.f, 0.f, 1.f);
+		b = std::clamp((fstrength - (2.f / 3.f)) * 3.f, 0.f, 1.f);
+		a = 1;
-		// heavily simplified -- we don't need to know x or y
-		// position when we're doing a full screen fade
-		for (; buf < deststop; ++buf)
-			*buf = fadetable[*buf];
+		blendmode = hwr2::Draw2dBlend::kReverseSubtractive;
+	else
+	{
+		// Historically TRANSMAP fade
+		// This is done by modulative (transparent) blend to the given palette color.
+		byteColor_t bc = V_GetColor(color).s;
+		r = bc.red / 255.f;
+		g = bc.green / 255.f;
+		b = bc.blue / 255.f;
+		a = softwaretranstohwr[std::clamp(static_cast<int>(strength), 0, 10)] / 255.f;
+		blendmode = hwr2::Draw2dBlend::kModulate;
+	}
+	g_2d.begin_quad()
+		.blend(blendmode)
+		.color(r, g, b, a)
+		.rect(0, 0, vid.width, vid.height)
+		.done();
@@ -1643,6 +1568,8 @@ void V_DrawFadeScreen(UINT16 color, UINT8 strength)
 void V_DrawCustomFadeScreen(const char *lump, UINT8 strength)
+	(void)lump;
+	(void)strength;
 #ifdef HWRENDER
 	if (rendermode != render_soft && rendermode != render_none)
@@ -1651,57 +1578,30 @@ void V_DrawCustomFadeScreen(const char *lump, UINT8 strength)
-	{
-		lumpnum_t lumpnum = LUMPERROR;
-		lighttable_t *clm = NULL;
-		if (lump != NULL)
-			lumpnum = W_GetNumForName(lump);
-		else
-			return;
-		if (lumpnum != LUMPERROR)
-		{
-			clm = static_cast<lighttable_t*>(Z_MallocAlign(COLORMAP_SIZE, PU_STATIC, NULL, 8));
-			W_ReadLump(lumpnum, clm);
-			if (clm != NULL)
-			{
-				const UINT8 *fadetable = ((UINT8 *)clm + strength*256);
-				const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
-				UINT8 *buf = screens[0];
-				// heavily simplified -- we don't need to know x or y
-				// position when we're doing a full screen fade
-				for (; buf < deststop; ++buf)
-					*buf = fadetable[*buf];
-				Z_Free(clm);
-				clm = NULL;
-			}
-		}
-	}
+	// NOTE: This is not implementable in HWR2.
 // Simple translucency with one color, over a set number of lines starting from the top.
 void V_DrawFadeConsBack(INT32 plines)
-	UINT8 *deststop, *buf;
+	UINT32 hwcolor = V_GetHWConsBackColor();
 #ifdef HWRENDER // not win32 only 19990829 by Kin
 	if (rendermode == render_opengl)
-		UINT32 hwcolor = V_GetHWConsBackColor();
 		HWR_DrawConsoleBack(hwcolor, plines);
-	// heavily simplified -- we don't need to know x or y position,
-	// just the stop position
-	deststop = screens[0] + vid.rowbytes * std::min(plines, vid.height);
-	for (buf = screens[0]; buf < deststop; ++buf)
-		*buf = consolebgmap[*buf];
+	float r = ((hwcolor & 0xFF000000) >> 24) / 255.f;
+	float g = ((hwcolor & 0xFF0000) >> 16) / 255.f;
+	float b = ((hwcolor & 0xFF00) >> 8) / 255.f;
+	float a = 0.5f;
+	g_2d.begin_quad()
+		.rect(0, 0, vid.width, plines)
+		.blend(hwr2::Draw2dBlend::kModulate)
+		.color(r, g, b, a)
+		.done();
@@ -1718,26 +1618,16 @@ void V_EncoreInvertScreen(void)
-	{
-		const UINT8 *deststop = screens[0] + vid.rowbytes * vid.height;
-		UINT8 *buf = screens[0];
-		for (; buf < deststop; ++buf)
-		{
-			*buf = NearestColor(
-				255 - pLocalPalette[*buf].s.red,
-				255 - pLocalPalette[*buf].s.green,
-				255 - pLocalPalette[*buf].s.blue
-			);
-		}
-	}
+	g_2d.begin_quad()
+		.blend(hwr2::Draw2dBlend::kInvertDest)
+		.color(1, 1, 1, 1)
+		.rect(0, 0, vid.width, vid.height)
+		.done();
 // Very similar to F_DrawFadeConsBack, except we draw from the middle(-ish) of the screen to the bottom.
 void V_DrawPromptBack(INT32 boxheight, INT32 color)
-	UINT8 *deststop, *buf;
 	if (color >= 256 && color < 512)
 		if (boxheight < 0)
@@ -1753,50 +1643,50 @@ void V_DrawPromptBack(INT32 boxheight, INT32 color)
 	if (color == INT32_MAX)
 		color = cons_backcolor.value;
+	UINT32 hwcolor;
+	switch (color)
+	{
+		case 0:		hwcolor = 0xffffff00;	break; 	// White
+		case 1:		hwcolor = 0x00000000;	break; 	// Black // Note this is different from V_DrawFadeConsBack
+		case 2:		hwcolor = 0xdeb88700;	break;	// Sepia
+		case 3:		hwcolor = 0x40201000;	break; 	// Brown
+		case 4:		hwcolor = 0xfa807200;	break; 	// Pink
+		case 5:		hwcolor = 0xff69b400;	break; 	// Raspberry
+		case 6:		hwcolor = 0xff000000;	break; 	// Red
+		case 7:		hwcolor = 0xffd68300;	break;	// Creamsicle
+		case 8:		hwcolor = 0xff800000;	break; 	// Orange
+		case 9:		hwcolor = 0xdaa52000;	break; 	// Gold
+		case 10:	hwcolor = 0x80800000;	break; 	// Yellow
+		case 11:	hwcolor = 0x00ff0000;	break; 	// Emerald
+		case 12:	hwcolor = 0x00800000;	break; 	// Green
+		case 13:	hwcolor = 0x4080ff00;	break; 	// Cyan
+		case 14:	hwcolor = 0x4682b400;	break; 	// Steel
+		case 15:	hwcolor = 0x1e90ff00;	break;	// Periwinkle
+		case 16:	hwcolor = 0x0000ff00;	break; 	// Blue
+		case 17:	hwcolor = 0xff00ff00;	break; 	// Purple
+		case 18:	hwcolor = 0xee82ee00;	break; 	// Lavender
+		// Default green
+		default:	hwcolor = 0x00800000;	break;
+	}
 #ifdef HWRENDER
 	if (rendermode == render_opengl)
-		UINT32 hwcolor;
-		switch (color)
-		{
-			case 0:		hwcolor = 0xffffff00;	break; 	// White
-			case 1:		hwcolor = 0x00000000;	break; 	// Black // Note this is different from V_DrawFadeConsBack
-			case 2:		hwcolor = 0xdeb88700;	break;	// Sepia
-			case 3:		hwcolor = 0x40201000;	break; 	// Brown
-			case 4:		hwcolor = 0xfa807200;	break; 	// Pink
-			case 5:		hwcolor = 0xff69b400;	break; 	// Raspberry
-			case 6:		hwcolor = 0xff000000;	break; 	// Red
-			case 7:		hwcolor = 0xffd68300;	break;	// Creamsicle
-			case 8:		hwcolor = 0xff800000;	break; 	// Orange
-			case 9:		hwcolor = 0xdaa52000;	break; 	// Gold
-			case 10:	hwcolor = 0x80800000;	break; 	// Yellow
-			case 11:	hwcolor = 0x00ff0000;	break; 	// Emerald
-			case 12:	hwcolor = 0x00800000;	break; 	// Green
-			case 13:	hwcolor = 0x4080ff00;	break; 	// Cyan
-			case 14:	hwcolor = 0x4682b400;	break; 	// Steel
-			case 15:	hwcolor = 0x1e90ff00;	break;	// Periwinkle
-			case 16:	hwcolor = 0x0000ff00;	break; 	// Blue
-			case 17:	hwcolor = 0xff00ff00;	break; 	// Purple
-			case 18:	hwcolor = 0xee82ee00;	break; 	// Lavender
-			// Default green
-			default:	hwcolor = 0x00800000;	break;
-		}
 		HWR_DrawTutorialBack(hwcolor, boxheight);
-	CON_SetupBackColormapEx(color, true);
-	// heavily simplified -- we don't need to know x or y position,
-	// just the start and stop positions
-	buf = deststop = screens[0] + vid.rowbytes * vid.height;
-	if (boxheight < 0)
-		buf += vid.rowbytes * boxheight;
-	else // 4 lines of space plus gaps between and some leeway
-		buf -= vid.rowbytes * ((boxheight * 4) + (boxheight/2)*5);
-	for (; buf < deststop; ++buf)
-		*buf = promptbgmap[*buf];
+	float r = ((color & 0xFF000000) >> 24) / 255.f;
+	float g = ((color & 0xFF0000) >> 16) / 255.f;
+	float b = ((color & 0xFF00) >> 8) / 255.f;
+	float a = (color == 0 ? 0xC0 : 0x80) / 255.f; // make black darker, like software
+	INT32 real_boxheight = (boxheight * 4) + (boxheight / 2) * 5;
+	g_2d.begin_quad()
+		.rect(0, vid.height - real_boxheight, vid.width, real_boxheight)
+		.color(r, g, b, a)
+		.done();
 // Gets string colormap, used for 0x80 color codes
diff --git a/src/v_video.h b/src/v_video.h
index 497f0e7125f208b49a9bd320b6effa14e07f3191..bda5920d9c375c08e90e04e33938bb6ee44113db 100644
--- a/src/v_video.h
+++ b/src/v_video.h
@@ -22,6 +22,14 @@
 #include "hu_stuff.h" // fonts
 #ifdef __cplusplus
+#include "hwr2/twodee.hpp"
+namespace srb2
+extern hwr2::Twodee g_2d;
+} // namespace srb2
 extern "C" {
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 46e2b2a3d369ace2b9f76871eb623f54fe55f635..6fa5e9687980705457ffa1d815deb6d1c4b45831 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -31,4 +31,5 @@ include("cpm-libyuv.cmake")
diff --git a/thirdparty/cpm-imgui.cmake b/thirdparty/cpm-imgui.cmake
index 2afd71a2c29c5eb9381947f6976a0a0f93b92e11..788643f431d715e383c2da1053318e9042a62544 100644
--- a/thirdparty/cpm-imgui.cmake
+++ b/thirdparty/cpm-imgui.cmake
@@ -31,5 +31,6 @@ if(imgui_ADDED)
 	target_include_directories(imgui PUBLIC "${imgui_BINARY_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/imgui_config")
 	target_compile_definitions(imgui PUBLIC IMGUI_USER_CONFIG="srb2_imconfig.h")
 	target_compile_features(imgui PUBLIC cxx_std_11)
+	target_link_libraries(imgui PRIVATE stb_rect_pack)
 	add_library(imgui::imgui ALIAS imgui)
diff --git a/thirdparty/imgui_config/srb2_imconfig.h b/thirdparty/imgui_config/srb2_imconfig.h
index 5c09001b20867ab0ea0d7b93055c07623d46ba7f..48645d16ca98eeba2004ee6fd7597d44718d814b 100644
--- a/thirdparty/imgui_config/srb2_imconfig.h
+++ b/thirdparty/imgui_config/srb2_imconfig.h
@@ -5,6 +5,7 @@
 // We provide needed functionalities provided by default win32 impls through the interface layer
diff --git a/thirdparty/stb_rect_pack/CMakeLists.txt b/thirdparty/stb_rect_pack/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0e610c95fb6b8454efb1ec52952cba321ab301f
--- /dev/null
+++ b/thirdparty/stb_rect_pack/CMakeLists.txt
@@ -0,0 +1,3 @@
+# Update from https://github.com/nothings/stb
+add_library(stb_rect_pack STATIC stb_rect_pack.c include/stb_rect_pack.h)
+target_include_directories(stb_rect_pack PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
diff --git a/thirdparty/stb_rect_pack/include/stb_rect_pack.h b/thirdparty/stb_rect_pack/include/stb_rect_pack.h
new file mode 100644
index 0000000000000000000000000000000000000000..6a633ce666a8600d7d974996fea072faabe939b3
--- /dev/null
+++ b/thirdparty/stb_rect_pack/include/stb_rect_pack.h
@@ -0,0 +1,623 @@
+// stb_rect_pack.h - v1.01 - public domain - rectangle packing
+// Sean Barrett 2014
+// Useful for e.g. packing rectangular textures into an atlas.
+// Does not do rotation.
+// Before #including,
+// in the file that you want to have the implementation.
+// Not necessarily the awesomest packing method, but better than
+// the totally naive one in stb_truetype (which is primarily what
+// this is meant to replace).
+// Has only had a few tests run, may have issues.
+// More docs to come.
+// No memory allocations; uses qsort() and assert() from stdlib.
+// Can override those by defining STBRP_SORT and STBRP_ASSERT.
+// This library currently uses the Skyline Bottom-Left algorithm.
+// Please note: better rectangle packers are welcome! Please
+// implement them to the same API, but with a different init
+// function.
+// Credits
+//  Library
+//    Sean Barrett
+//  Minor features
+//    Martins Mozeiko
+//    github:IntellectualKitty
+//  Bugfixes / warning fixes
+//    Jeremy Jaussaud
+//    Fabian Giesen
+// Version history:
+//     1.01  (2021-07-11)  always use large rect mode, expose STBRP__MAXVAL in public section
+//     1.00  (2019-02-25)  avoid small space waste; gracefully fail too-wide rectangles
+//     0.99  (2019-02-07)  warning fixes
+//     0.11  (2017-03-03)  return packing success/fail result
+//     0.10  (2016-10-25)  remove cast-away-const to avoid warnings
+//     0.09  (2016-08-27)  fix compiler warnings
+//     0.08  (2015-09-13)  really fix bug with empty rects (w=0 or h=0)
+//     0.07  (2015-09-13)  fix bug with empty rects (w=0 or h=0)
+//     0.06  (2015-04-15)  added STBRP_SORT to allow replacing qsort
+//     0.05:  added STBRP_ASSERT to allow replacing assert
+//     0.04:  fixed minor bug in STBRP_LARGE_RECTS support
+//     0.01:  initial release
+//   See end of file for license information.
+#define STBRP_DEF static
+#define STBRP_DEF extern
+#ifdef __cplusplus
+extern "C" {
+typedef struct stbrp_context stbrp_context;
+typedef struct stbrp_node    stbrp_node;
+typedef struct stbrp_rect    stbrp_rect;
+typedef int            stbrp_coord;
+#define STBRP__MAXVAL  0x7fffffff
+// Mostly for internal use, but this is the maximum supported coordinate value.
+STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects);
+// Assign packed locations to rectangles. The rectangles are of type
+// 'stbrp_rect' defined below, stored in the array 'rects', and there
+// are 'num_rects' many of them.
+// Rectangles which are successfully packed have the 'was_packed' flag
+// set to a non-zero value and 'x' and 'y' store the minimum location
+// on each axis (i.e. bottom-left in cartesian coordinates, top-left
+// if you imagine y increasing downwards). Rectangles which do not fit
+// have the 'was_packed' flag set to 0.
+// You should not try to access the 'rects' array from another thread
+// while this function is running, as the function temporarily reorders
+// the array while it executes.
+// To pack into another rectangle, you need to call stbrp_init_target
+// again. To continue packing into the same rectangle, you can call
+// this function again. Calling this multiple times with multiple rect
+// arrays will probably produce worse packing results than calling it
+// a single time with the full rectangle array, but the option is
+// available.
+// The function returns 1 if all of the rectangles were successfully
+// packed and 0 otherwise.
+struct stbrp_rect
+   // reserved for your use:
+   int            id;
+   // input:
+   stbrp_coord    w, h;
+   // output:
+   stbrp_coord    x, y;
+   int            was_packed;  // non-zero if valid packing
+}; // 16 bytes, nominally
+STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes);
+// Initialize a rectangle packer to:
+//    pack a rectangle that is 'width' by 'height' in dimensions
+//    using temporary storage provided by the array 'nodes', which is 'num_nodes' long
+// You must call this function every time you start packing into a new target.
+// There is no "shutdown" function. The 'nodes' memory must stay valid for
+// the following stbrp_pack_rects() call (or calls), but can be freed after
+// the call (or calls) finish.
+// Note: to guarantee best results, either:
+//       1. make sure 'num_nodes' >= 'width'
+//   or  2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1'
+// If you don't do either of the above things, widths will be quantized to multiples
+// of small integers to guarantee the algorithm doesn't run out of temporary storage.
+// If you do #2, then the non-quantized algorithm will be used, but the algorithm
+// may run out of temporary storage and be unable to pack some rectangles.
+STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem);
+// Optionally call this function after init but before doing any packing to
+// change the handling of the out-of-temp-memory scenario, described above.
+// If you call init again, this will be reset to the default (false).
+STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic);
+// Optionally select which packing heuristic the library should use. Different
+// heuristics will produce better/worse results for different data sets.
+// If you call init again, this will be reset to the default.
+   STBRP_HEURISTIC_Skyline_default=0,
+   STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default,
+   STBRP_HEURISTIC_Skyline_BF_sortHeight
+// the details of the following structures don't matter to you, but they must
+// be visible so you can handle the memory allocations for them
+struct stbrp_node
+   stbrp_coord  x,y;
+   stbrp_node  *next;
+struct stbrp_context
+   int width;
+   int height;
+   int align;
+   int init_mode;
+   int heuristic;
+   int num_nodes;
+   stbrp_node *active_head;
+   stbrp_node *free_head;
+   stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2'
+#ifdef __cplusplus
+#ifndef STBRP_SORT
+#include <stdlib.h>
+#define STBRP_SORT qsort
+#include <assert.h>
+#define STBRP_ASSERT assert
+#ifdef _MSC_VER
+#define STBRP__NOTUSED(v)  (void)(v)
+#define STBRP__CDECL       __cdecl
+#define STBRP__NOTUSED(v)  (void)sizeof(v)
+#define STBRP__CDECL
+   STBRP__INIT_skyline = 1
+STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic)
+   switch (context->init_mode) {
+      case STBRP__INIT_skyline:
+         STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight);
+         context->heuristic = heuristic;
+         break;
+      default:
+         STBRP_ASSERT(0);
+   }
+STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem)
+   if (allow_out_of_mem)
+      // if it's ok to run out of memory, then don't bother aligning them;
+      // this gives better packing, but may fail due to OOM (even though
+      // the rectangles easily fit). @TODO a smarter approach would be to only
+      // quantize once we've hit OOM, then we could get rid of this parameter.
+      context->align = 1;
+   else {
+      // if it's not ok to run out of memory, then quantize the widths
+      // so that num_nodes is always enough nodes.
+      //
+      // I.e. num_nodes * align >= width
+      //                  align >= width / num_nodes
+      //                  align = ceil(width/num_nodes)
+      context->align = (context->width + context->num_nodes-1) / context->num_nodes;
+   }
+STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
+   int i;
+   for (i=0; i < num_nodes-1; ++i)
+      nodes[i].next = &nodes[i+1];
+   nodes[i].next = NULL;
+   context->init_mode = STBRP__INIT_skyline;
+   context->heuristic = STBRP_HEURISTIC_Skyline_default;
+   context->free_head = &nodes[0];
+   context->active_head = &context->extra[0];
+   context->width = width;
+   context->height = height;
+   context->num_nodes = num_nodes;
+   stbrp_setup_allow_out_of_mem(context, 0);
+   // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly)
+   context->extra[0].x = 0;
+   context->extra[0].y = 0;
+   context->extra[0].next = &context->extra[1];
+   context->extra[1].x = (stbrp_coord) width;
+   context->extra[1].y = (1<<30);
+   context->extra[1].next = NULL;
+// find minimum y position if it starts at x1
+static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste)
+   stbrp_node *node = first;
+   int x1 = x0 + width;
+   int min_y, visited_width, waste_area;
+   STBRP_ASSERT(first->x <= x0);
+   #if 0
+   // skip in case we're past the node
+   while (node->next->x <= x0)
+      ++node;
+   #else
+   STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency
+   #endif
+   STBRP_ASSERT(node->x <= x0);
+   min_y = 0;
+   waste_area = 0;
+   visited_width = 0;
+   while (node->x < x1) {
+      if (node->y > min_y) {
+         // raise min_y higher.
+         // we've accounted for all waste up to min_y,
+         // but we'll now add more waste for everything we've visted
+         waste_area += visited_width * (node->y - min_y);
+         min_y = node->y;
+         // the first time through, visited_width might be reduced
+         if (node->x < x0)
+            visited_width += node->next->x - x0;
+         else
+            visited_width += node->next->x - node->x;
+      } else {
+         // add waste area
+         int under_width = node->next->x - node->x;
+         if (under_width + visited_width > width)
+            under_width = width - visited_width;
+         waste_area += under_width * (min_y - node->y);
+         visited_width += under_width;
+      }
+      node = node->next;
+   }
+   *pwaste = waste_area;
+   return min_y;
+typedef struct
+   int x,y;
+   stbrp_node **prev_link;
+} stbrp__findresult;
+static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height)
+   int best_waste = (1<<30), best_x, best_y = (1 << 30);
+   stbrp__findresult fr;
+   stbrp_node **prev, *node, *tail, **best = NULL;
+   // align to multiple of c->align
+   width = (width + c->align - 1);
+   width -= width % c->align;
+   STBRP_ASSERT(width % c->align == 0);
+   // if it can't possibly fit, bail immediately
+   if (width > c->width || height > c->height) {
+      fr.prev_link = NULL;
+      fr.x = fr.y = 0;
+      return fr;
+   }
+   node = c->active_head;
+   prev = &c->active_head;
+   while (node->x + width <= c->width) {
+      int y,waste;
+      y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste);
+      if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL
+         // bottom left
+         if (y < best_y) {
+            best_y = y;
+            best = prev;
+         }
+      } else {
+         // best-fit
+         if (y + height <= c->height) {
+            // can only use it if it first vertically
+            if (y < best_y || (y == best_y && waste < best_waste)) {
+               best_y = y;
+               best_waste = waste;
+               best = prev;
+            }
+         }
+      }
+      prev = &node->next;
+      node = node->next;
+   }
+   best_x = (best == NULL) ? 0 : (*best)->x;
+   // if doing best-fit (BF), we also have to try aligning right edge to each node position
+   //
+   // e.g, if fitting
+   //
+   //     ____________________
+   //    |____________________|
+   //
+   //            into
+   //
+   //   |                         |
+   //   |             ____________|
+   //   |____________|
+   //
+   // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned
+   //
+   // This makes BF take about 2x the time
+   if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) {
+      tail = c->active_head;
+      node = c->active_head;
+      prev = &c->active_head;
+      // find first node that's admissible
+      while (tail->x < width)
+         tail = tail->next;
+      while (tail) {
+         int xpos = tail->x - width;
+         int y,waste;
+         STBRP_ASSERT(xpos >= 0);
+         // find the left position that matches this
+         while (node->next->x <= xpos) {
+            prev = &node->next;
+            node = node->next;
+         }
+         STBRP_ASSERT(node->next->x > xpos && node->x <= xpos);
+         y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste);
+         if (y + height <= c->height) {
+            if (y <= best_y) {
+               if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) {
+                  best_x = xpos;
+                  STBRP_ASSERT(y <= best_y);
+                  best_y = y;
+                  best_waste = waste;
+                  best = prev;
+               }
+            }
+         }
+         tail = tail->next;
+      }
+   }
+   fr.prev_link = best;
+   fr.x = best_x;
+   fr.y = best_y;
+   return fr;
+static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height)
+   // find best position according to heuristic
+   stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height);
+   stbrp_node *node, *cur;
+   // bail if:
+   //    1. it failed
+   //    2. the best node doesn't fit (we don't always check this)
+   //    3. we're out of memory
+   if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) {
+      res.prev_link = NULL;
+      return res;
+   }
+   // on success, create new node
+   node = context->free_head;
+   node->x = (stbrp_coord) res.x;
+   node->y = (stbrp_coord) (res.y + height);
+   context->free_head = node->next;
+   // insert the new node into the right starting point, and
+   // let 'cur' point to the remaining nodes needing to be
+   // stiched back in
+   cur = *res.prev_link;
+   if (cur->x < res.x) {
+      // preserve the existing one, so start testing with the next one
+      stbrp_node *next = cur->next;
+      cur->next = node;
+      cur = next;
+   } else {
+      *res.prev_link = node;
+   }
+   // from here, traverse cur and free the nodes, until we get to one
+   // that shouldn't be freed
+   while (cur->next && cur->next->x <= res.x + width) {
+      stbrp_node *next = cur->next;
+      // move the current node to the free list
+      cur->next = context->free_head;
+      context->free_head = cur;
+      cur = next;
+   }
+   // stitch the list back in
+   node->next = cur;
+   if (cur->x < res.x + width)
+      cur->x = (stbrp_coord) (res.x + width);
+#ifdef _DEBUG
+   cur = context->active_head;
+   while (cur->x < context->width) {
+      STBRP_ASSERT(cur->x < cur->next->x);
+      cur = cur->next;
+   }
+   STBRP_ASSERT(cur->next == NULL);
+   {
+      int count=0;
+      cur = context->active_head;
+      while (cur) {
+         cur = cur->next;
+         ++count;
+      }
+      cur = context->free_head;
+      while (cur) {
+         cur = cur->next;
+         ++count;
+      }
+      STBRP_ASSERT(count == context->num_nodes+2);
+   }
+   return res;
+static int STBRP__CDECL rect_height_compare(const void *a, const void *b)
+   const stbrp_rect *p = (const stbrp_rect *) a;
+   const stbrp_rect *q = (const stbrp_rect *) b;
+   if (p->h > q->h)
+      return -1;
+   if (p->h < q->h)
+      return  1;
+   return (p->w > q->w) ? -1 : (p->w < q->w);
+static int STBRP__CDECL rect_original_order(const void *a, const void *b)
+   const stbrp_rect *p = (const stbrp_rect *) a;
+   const stbrp_rect *q = (const stbrp_rect *) b;
+   return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
+STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
+   int i, all_rects_packed = 1;
+   // we use the 'was_packed' field internally to allow sorting/unsorting
+   for (i=0; i < num_rects; ++i) {
+      rects[i].was_packed = i;
+   }
+   // sort according to heuristic
+   STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare);
+   for (i=0; i < num_rects; ++i) {
+      if (rects[i].w == 0 || rects[i].h == 0) {
+         rects[i].x = rects[i].y = 0;  // empty rect needs no space
+      } else {
+         stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h);
+         if (fr.prev_link) {
+            rects[i].x = (stbrp_coord) fr.x;
+            rects[i].y = (stbrp_coord) fr.y;
+         } else {
+            rects[i].x = rects[i].y = STBRP__MAXVAL;
+         }
+      }
+   }
+   // unsort
+   STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order);
+   // set was_packed flags and all_rects_packed status
+   for (i=0; i < num_rects; ++i) {
+      rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL);
+      if (!rects[i].was_packed)
+         all_rects_packed = 0;
+   }
+   // return the all_rects_packed status
+   return all_rects_packed;
+This software is available under 2 licenses -- choose whichever you prefer.
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
diff --git a/thirdparty/stb_rect_pack/stb_rect_pack.c b/thirdparty/stb_rect_pack/stb_rect_pack.c
new file mode 100644
index 0000000000000000000000000000000000000000..3f3391d6f9b1fb8e31ff1151d9c07de9a8485da9
--- /dev/null
+++ b/thirdparty/stb_rect_pack/stb_rect_pack.c
@@ -0,0 +1,2 @@
+#include "include/stb_rect_pack.h"