#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdbool.h>
#include <sys/mman.h>
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>

#include <elf.h>
#include <libdis.h>

#include "rewrite.h"

// Brute force symbol lookup uses popen to run a heinous shell pipeline
// and extract the result.
uint32_t symbol_lookup(char *filename, char *symbolname)
{
	char cmd[800];	// Kids, say no to drugs. Do as I say, not as I do.
	snprintf(cmd, sizeof(cmd), "readelf --syms '%s' | grep '\\<%s\\>' | awk '{print $2}' | grep -v '00000000'", filename, symbolname);
	//fprintf(stderr, ":: %s\n", cmd);
	FILE *result_pipe = popen(cmd, "r");
	assert(result_pipe != NULL);
	char result_buf[800];
	char *result = fgets(result_buf, sizeof(result_buf), result_pipe);
	pclose(result_pipe);

	assert(result!=NULL);
	uint32_t value;
	int rc = sscanf(result, "%08x", &value);
	assert(rc==1);
	return value;
}

void rewriter_open(Rewriter *rw, char *infile)
{
	int rc;

	rw->infile = infile;

	struct stat statbuf;
	rc = stat(infile, &statbuf);
	assert(rc==0);
	rw->size = statbuf.st_size;

	int fd = open(infile, O_RDONLY);
	assert(fd >= 0);
	rw->image = mmap(NULL, rw->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);

	uint8_t *magic = (uint8_t*)rw->image;
	assert(magic[EI_MAG0]==ELFMAG0);
	assert(magic[EI_MAG1]==ELFMAG1);
	assert(magic[EI_MAG2]==ELFMAG2);
	assert(magic[EI_MAG3]==ELFMAG3);

	rw->ehdr = (Elf32_Ehdr *) rw->image;
}

void *rewriter_virtual_to_mapped(Rewriter *rw, uint32_t vaddr)
{
	int pi;
	Elf32_Phdr *phdrs = (Elf32_Phdr *) (rw->image + rw->ehdr->e_phoff);
	for (pi=0; pi<rw->ehdr->e_phnum; pi++)
	{
		Elf32_Phdr *phdr = &phdrs[pi];
		fprintf(stderr, "%08x <? %08x <? %08x\n",
			phdr->p_vaddr, vaddr, phdr->p_vaddr+phdr->p_filesz);
		if (vaddr >= phdr->p_vaddr && vaddr <= phdr->p_vaddr+phdr->p_filesz)
		{
			uint32_t offset = vaddr - phdr->p_vaddr + phdr->p_offset;
			return rw->image + offset;
		}
	}
	assert(false);
}

// Figure out where in __write the int 0x80 begins.
// Return an offset to the instruction before that, since
// we need enough space to write a longer sequence.
// (For expediency, we're hardcoding our secret knowledge
// that the prior instruction is a 5-byte mov.)
uint32_t rewriter_scan_write(Rewriter *rw, void *write_func)
{
	int len = 200;
		// symbol table would actually tell us len of fcn,
		// but we were lazy and only looked up its entry point.
	int pos = 0;
	int last_insn_size=0;
	while (pos < len)
	{
		x86_insn_t insn;
		int insn_size = x86_disasm(write_func, len, 0, pos, &insn);
		assert(insn_size>0);

		// Display the instruction for debugging
		char buf[500];
		x86_format_insn(&insn, buf, sizeof(buf), att_syntax);
		fprintf(stderr, "%3d %s\n", pos, buf);

		if (insn.type == insn_int)
		{
			// For this one-off hack, we expect to see
			// a 5-byte mov
			assert(last_insn_size == 5);
			// followed by a 2-byte int 0x80
			assert(insn_size == 2);
			break;
		}
		
		pos += insn_size;
		last_insn_size = insn_size;
	}
	// return the offset into the function
	// of the 7-byte spot we're going to patch.
	return pos - 5;
}

// Stash away the collaterally-smashed instruction, and
// overwrite the 7-byte chunk of space with our own call,
// jumping to the empty_space we reserved.
void rewriter_patch_write(Rewriter *rw, uint32_t write_vaddr, uint32_t empty_space_vaddr)
{
	void *write_mapped = rewriter_virtual_to_mapped(rw, write_vaddr);
	uint32_t pos = rewriter_scan_write(rw, write_mapped);
	void *patch_site = write_mapped+pos;

	// stash away the move instruction we're about to splatter;
	// we'll need that in a bit!
	memcpy(rw->saved_mov_instruction, patch_site, 5);

	*((uint8_t*)(patch_site)) = 0xe8;	// call
	*((uint32_t*)(patch_site+1)) = empty_space_vaddr - (write_vaddr+pos+5);
		// relative jump measured from pc after the call instruction is fetched.
	*((uint8_t*)(patch_site+5)) = 0x90;	// nop
	*((uint8_t*)(patch_site+6)) = 0x90;	// nop
}

// Use some spare space to emit the assembly code that
// will replace the parts we just patched over --
// replace the splattered instruction,
// then add code to call our replacement for the int 0x80.
void rewriter_create_trampoline(Rewriter *rw, uint32_t empty_space_vaddr, uint32_t target_vaddr)
{
	void *trampoline_site = rewriter_virtual_to_mapped(rw, empty_space_vaddr);

	// put back the instruction we had to overwrite
	memcpy(trampoline_site, rw->saved_mov_instruction, 5);

	*((uint8_t*)(trampoline_site+5)) = 0x52;		// push %edx
	*((uint8_t*)(trampoline_site+6)) = 0x51;		// push %ecx
	*((uint8_t*)(trampoline_site+7)) = 0x53;		// push %ebx
	*((uint8_t*)(trampoline_site+8)) = 0xe8;		// call
	*((uint32_t*)(trampoline_site+9)) = target_vaddr - (empty_space_vaddr + 13);
		// note again the relative-offset math, based at the PC after the jmp.
	*((uint8_t*)(trampoline_site+13)) = 0x5b;	// pop %ebx
	*((uint8_t*)(trampoline_site+14)) = 0x59;	// pop %ecx
	*((uint8_t*)(trampoline_site+15)) = 0x5a;	// pop %edx
	*((uint8_t*)(trampoline_site+16)) = 0xc3;	// ret
}

void rewriter_edit(Rewriter *rw)
{
	uint32_t write_vaddr = symbol_lookup(rw->infile, "__write");
	uint32_t empty_space_vaddr = symbol_lookup(rw->infile, "empty_space");
	rewriter_patch_write(rw, write_vaddr, empty_space_vaddr);

	uint32_t target_vaddr = symbol_lookup(rw->infile, "writeizzle");
	rewriter_create_trampoline(rw, empty_space_vaddr, target_vaddr);
}

void rewriter_emit(Rewriter *rw, char *outfile)
{
	FILE *fp = fopen(outfile, "w");
	int rc;
	rc = fwrite(rw->image, rw->size, 1, fp);
	assert(rc==1);
	fclose(fp);
	chmod(outfile, 0755);
}

int main(int argc, char **argv)
{
	assert(argc==3);
	char *infile = argv[1];
	char *outfile = argv[2];

	Rewriter rw;
	rewriter_open(&rw, infile);
	rewriter_edit(&rw);
	rewriter_emit(&rw, outfile);

	return 0;
}
