diff --git a/src/doomdef.h b/src/doomdef.h
index b382d0ecb4bbaa09a0da1180e481e2f979014e32..4e08b11bfb0116c2f60fffc33582d6c9a56f3b27 100644
--- a/src/doomdef.h
+++ b/src/doomdef.h
@@ -533,7 +533,7 @@ extern char liveeventbackup[256];
 #define M_GetText(x) (x)
 #endif
 void M_StartupLocale(void);
-extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL;
+void *M_Memcpy(void* dest, const void* src, size_t n);
 char *va(const char *format, ...) FUNCPRINTF;
 char *M_GetToken(const char *inputString);
 void M_UnGetToken(void);
diff --git a/src/m_misc.c b/src/m_misc.c
index 923a188c62346d4c9d72ea1759898fa78aa867ac..ce332910dc8ccf78a8c751adfc96c28c3721aea4 100644
--- a/src/m_misc.c
+++ b/src/m_misc.c
@@ -2207,430 +2207,11 @@ char *sizeu5(size_t num)
 	return sizeu5_buf;
 }
 
-#if defined (__GNUC__) && defined (__i386__) // from libkwave, under GPL
-// Alam: note libkwave memcpy code comes from mplayer's libvo/aclib_template.c, r699
-
-/* for small memory blocks (<256 bytes) this version is faster */
-#define small_memcpy(dest,src,n)\
-{\
-register unsigned long int dummy;\
-__asm__ __volatile__(\
-	"cld\n\t"\
-	"rep; movsb"\
-	:"=&D"(dest), "=&S"(src), "=&c"(dummy)\
-	:"0" (dest), "1" (src),"2" (n)\
-	: "memory", "cc");\
-}
-/* linux kernel __memcpy (from: /include/asm/string.h) */
-ATTRINLINE static FUNCINLINE void *__memcpy (void *dest, const void * src, size_t n)
-{
-	int d0, d1, d2;
-
-	if ( n < 4 )
-	{
-		small_memcpy(dest, src, n);
-	}
-	else
-	{
-		__asm__ __volatile__ (
-			"rep ; movsl;"
-			"testb $2,%b4;"
-			"je 1f;"
-			"movsw;"
-			"1:\ttestb $1,%b4;"
-			"je 2f;"
-			"movsb;"
-			"2:"
-		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
-		:"0" (n/4), "q" (n),"1" ((long) dest),"2" ((long) src)
-		: "memory");
-	}
-
-	return dest;
-}
-
-#define SSE_MMREG_SIZE 16
-#define MMX_MMREG_SIZE 8
-
-#define MMX1_MIN_LEN 0x800  /* 2K blocks */
-#define MIN_LEN 0x40  /* 64-byte blocks */
-
-/* SSE note: i tried to move 128 bytes a time instead of 64 but it
-didn't make any measureable difference. i'm using 64 for the sake of
-simplicity. [MF] */
-static /*FUNCTARGET("sse2")*/ void *sse_cpy(void * dest, const void * src, size_t n)
-{
-	void *retval = dest;
-	size_t i;
-
-	/* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-		"prefetchnta (%0);"
-		"prefetchnta 32(%0);"
-		"prefetchnta 64(%0);"
-		"prefetchnta 96(%0);"
-		"prefetchnta 128(%0);"
-		"prefetchnta 160(%0);"
-		"prefetchnta 192(%0);"
-		"prefetchnta 224(%0);"
-		"prefetchnta 256(%0);"
-		"prefetchnta 288(%0);"
-		: : "r" (src) );
-
-	if (n >= MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=SSE_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		if (((unsigned long)src) & 15)
-		/* if SRC is misaligned */
-		 for (; i>0; i--)
-		 {
-			__asm__ __volatile__ (
-				"prefetchnta 320(%0);"
-				"prefetchnta 352(%0);"
-				"movups (%0), %%xmm0;"
-				"movups 16(%0), %%xmm1;"
-				"movups 32(%0), %%xmm2;"
-				"movups 48(%0), %%xmm3;"
-				"movntps %%xmm0, (%1);"
-				"movntps %%xmm1, 16(%1);"
-				"movntps %%xmm2, 32(%1);"
-				"movntps %%xmm3, 48(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = (const unsigned char *)src + 64;
-			dest = (unsigned char *)dest + 64;
-		}
-		else
-			/*
-			   Only if SRC is aligned on 16-byte boundary.
-			   It allows to use movaps instead of movups, which required data
-			   to be aligned or a general-protection exception (#GP) is generated.
-			*/
-		 for (; i>0; i--)
-		 {
-			__asm__ __volatile__ (
-				"prefetchnta 320(%0);"
-				"prefetchnta 352(%0);"
-				"movaps (%0), %%xmm0;"
-				"movaps 16(%0), %%xmm1;"
-				"movaps 32(%0), %%xmm2;"
-				"movaps 48(%0), %%xmm3;"
-				"movntps %%xmm0, (%1);"
-				"movntps %%xmm1, 16(%1);"
-				"movntps %%xmm2, 32(%1);"
-				"movntps %%xmm3, 48(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		/* since movntq is weakly-ordered, a "sfence"
-		 * is needed to become ordered again. */
-		__asm__ __volatile__ ("sfence":::"memory");
-		/* enables to use FPU */
-		__asm__ __volatile__ ("emms":::"memory");
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-}
-
-static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n)
-{
-	void *retval = dest;
-	size_t i;
-
-	/* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-		"prefetchnta (%0);"
-		"prefetchnta 32(%0);"
-		"prefetchnta 64(%0);"
-		"prefetchnta 96(%0);"
-		"prefetchnta 128(%0);"
-		"prefetchnta 160(%0);"
-		"prefetchnta 192(%0);"
-		"prefetchnta 224(%0);"
-		"prefetchnta 256(%0);"
-		"prefetchnta 288(%0);"
-	: : "r" (src));
-
-	if (n >= MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=MMX_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		for (; i>0; i--)
-		{
-			__asm__ __volatile__ (
-				"prefetchnta 320(%0);"
-				"prefetchnta 352(%0);"
-				"movq (%0), %%mm0;"
-				"movq 8(%0), %%mm1;"
-				"movq 16(%0), %%mm2;"
-				"movq 24(%0), %%mm3;"
-				"movq 32(%0), %%mm4;"
-				"movq 40(%0), %%mm5;"
-				"movq 48(%0), %%mm6;"
-				"movq 56(%0), %%mm7;"
-				"movntq %%mm0, (%1);"
-				"movntq %%mm1, 8(%1);"
-				"movntq %%mm2, 16(%1);"
-				"movntq %%mm3, 24(%1);"
-				"movntq %%mm4, 32(%1);"
-				"movntq %%mm5, 40(%1);"
-				"movntq %%mm6, 48(%1);"
-				"movntq %%mm7, 56(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		/* since movntq is weakly-ordered, a "sfence"
-		* is needed to become ordered again. */
-		__asm__ __volatile__ ("sfence":::"memory");
-		__asm__ __volatile__ ("emms":::"memory");
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-}
-
-static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW
-{
-	void *retval = dest;
-	size_t i;
-
-	/* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-		"prefetch (%0);"
-		"prefetch 32(%0);"
-		"prefetch 64(%0);"
-		"prefetch 96(%0);"
-		"prefetch 128(%0);"
-		"prefetch 160(%0);"
-		"prefetch 192(%0);"
-		"prefetch 224(%0);"
-		"prefetch 256(%0);"
-		"prefetch 288(%0);"
-	: : "r" (src));
-
-	if (n >= MMX1_MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=MMX_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		for (; i>0; i--)
-		{
-			__asm__ __volatile__ (
-				"prefetch 320(%0);"
-				"prefetch 352(%0);"
-				"movq (%0), %%mm0;"
-				"movq 8(%0), %%mm1;"
-				"movq 16(%0), %%mm2;"
-				"movq 24(%0), %%mm3;"
-				"movq 32(%0), %%mm4;"
-				"movq 40(%0), %%mm5;"
-				"movq 48(%0), %%mm6;"
-				"movq 56(%0), %%mm7;"
-				"movq %%mm0, (%1);"
-				"movq %%mm1, 8(%1);"
-				"movq %%mm2, 16(%1);"
-				"movq %%mm3, 24(%1);"
-				"movq %%mm4, 32(%1);"
-				"movq %%mm5, 40(%1);"
-				"movq %%mm6, 48(%1);"
-				"movq %%mm7, 56(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		__asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-}
-#endif
-
-// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type
-static void *cpu_cpy(void *dest, const void *src, size_t n)
+void *M_Memcpy(void *dest, const void *src, size_t n)
 {
-	if (src == NULL)
-	{
-		CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
-		return dest;
-	}
-
-	if(dest == NULL)
-	{
-		CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
-		return dest;
-	}
-
 	return memcpy(dest, src, n);
 }
 
-static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n)
-{
-#if defined (_MSC_VER) && defined (_X86_)
-	_asm
-	{
-		mov ecx, [n]
-		mov esi, [src]
-		mov edi, [dest]
-		shr ecx, 6 // mit mmx: 64bytes per iteration
-		jz lower_64 // if lower than 64 bytes
-		loop_64: // MMX transfers multiples of 64bytes
-		movq mm0,  0[ESI] // read sources
-		movq mm1,  8[ESI]
-		movq mm2, 16[ESI]
-		movq mm3, 24[ESI]
-		movq mm4, 32[ESI]
-		movq mm5, 40[ESI]
-		movq mm6, 48[ESI]
-		movq mm7, 56[ESI]
-
-		movq  0[EDI], mm0 // write destination
-		movq  8[EDI], mm1
-		movq 16[EDI], mm2
-		movq 24[EDI], mm3
-		movq 32[EDI], mm4
-		movq 40[EDI], mm5
-		movq 48[EDI], mm6
-		movq 56[EDI], mm7
-
-		add esi, 64
-		add edi, 64
-		dec ecx
-		jnz loop_64
-		emms // close mmx operation
-		lower_64:// transfer rest of buffer
-		mov ebx,esi
-		sub ebx,src
-		mov ecx,[n]
-		sub ecx,ebx
-		shr ecx, 3 // multiples of 8 bytes
-		jz lower_8
-		loop_8:
-		movq  mm0, [esi] // read source
-		movq [edi], mm0 // write destination
-		add esi, 8
-		add edi, 8
-		dec ecx
-		jnz loop_8
-		emms // close mmx operation
-		lower_8:
-		mov ebx,esi
-		sub ebx,src
-		mov ecx,[n]
-		sub ecx,ebx
-		rep movsb
-		mov eax, [dest] // return dest
-	}
-#elif defined (__GNUC__) && defined (__i386__)
-	void *retval = dest;
-	size_t i;
-
-	if (n >= MMX1_MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=MMX_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		for (; i>0; i--)
-		{
-			__asm__ __volatile__ (
-				"movq (%0), %%mm0;"
-				"movq 8(%0), %%mm1;"
-				"movq 16(%0), %%mm2;"
-				"movq 24(%0), %%mm3;"
-				"movq 32(%0), %%mm4;"
-				"movq 40(%0), %%mm5;"
-				"movq 48(%0), %%mm6;"
-				"movq 56(%0), %%mm7;"
-				"movq %%mm0, (%1);"
-				"movq %%mm1, 8(%1);"
-				"movq %%mm2, 16(%1);"
-				"movq %%mm3, 24(%1);"
-				"movq %%mm4, 32(%1);"
-				"movq %%mm5, 40(%1);"
-				"movq %%mm6, 48(%1);"
-				"movq %%mm7, 56(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		__asm__ __volatile__ ("emms":::"memory");
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-#else
-	return cpu_cpy(dest, src, n);
-#endif
-}
-
-void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy;
-
-/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE
-  * Do not use on overlapped memory, use memmove for that
-  */
-void M_SetupMemcpy(void)
-{
-#if defined (__GNUC__) && defined (__i386__)
-	if (R_SSE2)
-		M_Memcpy = sse_cpy;
-	else if (R_MMXExt)
-		M_Memcpy = mmx2_cpy;
-	else if (R_3DNow)
-		M_Memcpy = mmx1_cpy;
-	else
-#endif
-	if (R_MMX)
-		M_Memcpy = mmx_cpy;
-#if 0
-	M_Memcpy = cpu_cpy;
-#endif
-}
-
 /** Return the appropriate message for a file error or end of file.
 */
 const char *M_FileError(FILE *fp)
diff --git a/src/screen.c b/src/screen.c
index 3c50ec67eadef1d4e43441498f4ea96df0542f8d..ca59b251dce1f6f1371af433b010a18b4304e621 100644
--- a/src/screen.c
+++ b/src/screen.c
@@ -98,14 +98,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
 //  Short and Tall sky drawer, for the current color mode
 void (*walldrawerfunc)(void);
 
-boolean R_486 = false;
-boolean R_586 = false;
-boolean R_MMX = false;
-boolean R_SSE = false;
-boolean R_3DNow = false;
-boolean R_MMXExt = false;
-boolean R_SSE2 = false;
-
 void SCR_SetDrawFuncs(void)
 {
 	//
@@ -225,48 +217,6 @@ void SCR_SetMode(void)
 //
 void SCR_Startup(void)
 {
-	const CPUInfoFlags *RCpuInfo = I_CPUInfo();
-	if (!M_CheckParm("-NOCPUID") && RCpuInfo)
-	{
-#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__)
-		R_486 = true;
-#endif
-		if (RCpuInfo->RDTSC)
-			R_586 = true;
-		if (RCpuInfo->MMX)
-			R_MMX = true;
-		if (RCpuInfo->AMD3DNow)
-			R_3DNow = true;
-		if (RCpuInfo->MMXExt)
-			R_MMXExt = true;
-		if (RCpuInfo->SSE)
-			R_SSE = true;
-		if (RCpuInfo->SSE2)
-			R_SSE2 = true;
-		CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
-	}
-
-	if (M_CheckParm("-486"))
-		R_486 = true;
-	if (M_CheckParm("-586"))
-		R_586 = true;
-	if (M_CheckParm("-MMX"))
-		R_MMX = true;
-	if (M_CheckParm("-3DNow"))
-		R_3DNow = true;
-	if (M_CheckParm("-MMXExt"))
-		R_MMXExt = true;
-
-	if (M_CheckParm("-SSE"))
-		R_SSE = true;
-	if (M_CheckParm("-noSSE"))
-		R_SSE = false;
-
-	if (M_CheckParm("-SSE2"))
-		R_SSE2 = true;
-
-	M_SetupMemcpy();
-
 	if (dedicated)
 	{
 		V_Init();
diff --git a/src/screen.h b/src/screen.h
index 46c1b99c6ca962a18944e2e41fcd4c6d147d5327..e4c1006c35b79a7f9948f818c0a3b03edf84c558 100644
--- a/src/screen.h
+++ b/src/screen.h
@@ -172,17 +172,6 @@ extern void (*spanfunc)(void);
 extern void (*spanfuncs[SPANDRAWFUNC_MAX])(void);
 extern void (*spanfuncs_npo2[SPANDRAWFUNC_MAX])(void);
 
-// -----
-// CPUID
-// -----
-extern boolean R_ASM;
-extern boolean R_486;
-extern boolean R_586;
-extern boolean R_MMX;
-extern boolean R_3DNow;
-extern boolean R_MMXExt;
-extern boolean R_SSE2;
-
 // ----------------
 // screen variables
 // ----------------