diff --git a/python/pepy.cpp b/python/pepy.cpp index 91a82ed..e2bfdb3 100644 --- a/python/pepy.cpp +++ b/python/pepy.cpp @@ -33,6 +33,28 @@ using namespace peparse; #define PEPY_VERSION "0.2" +/* + * Add some definition for compatibility between python2 and python3 + */ +#if PY_MAJOR_VERSION >= 3 + #define PyInt_FromLong PyLong_FromLong + #define PyInt_AsLong PyLong_AsLong + #define PyString_FromString PyUnicode_FromString +#endif + +/* + * Some macro only available after python 2.6 + * Needed for compatibility with python3 + */ +#ifndef PyVarObject_HEAD_INIT + #define PyVarObject_HEAD_INIT(type, size) \ + PyObject_HEAD_INIT(type) size, +#endif + +#ifndef Py_TYPE + #define Py_TYPE(_ob_) (((PyObject*)(_ob_))->ob_type) +#endif + /* These are used to across multiple objects. */ #define PEPY_OBJECT_GET(OBJ, ATTR) \ static PyObject *pepy_##OBJ##_get_##ATTR(PyObject *self, void *closure) { \ @@ -55,11 +77,11 @@ using namespace peparse; static PyObject *pepy_error; -typedef struct { PyObject_HEAD } pepy; +struct pepy { PyObject_HEAD }; -typedef struct { PyObject_HEAD parsed_pe *pe; } pepy_parsed; +struct pepy_parsed { PyObject_HEAD parsed_pe *pe; }; -typedef struct { +struct pepy_section { PyObject_HEAD PyObject *name; PyObject *base; PyObject *length; @@ -69,9 +91,9 @@ typedef struct { PyObject *numlinenums; PyObject *characteristics; PyObject *data; -} pepy_section; +}; -typedef struct { +struct pepy_resource { PyObject_HEAD PyObject *type_str; PyObject *name_str; PyObject *lang_str; @@ -82,24 +104,24 @@ typedef struct { PyObject *RVA; PyObject *size; PyObject *data; -} pepy_resource; +}; -typedef struct { +struct pepy_import { PyObject_HEAD PyObject *name; PyObject *sym; PyObject *addr; -} pepy_import; +}; -typedef struct { +struct pepy_export { PyObject_HEAD PyObject *mod; PyObject *func; PyObject *addr; -} pepy_export; +}; -typedef struct { +struct pepy_relocation { PyObject_HEAD PyObject *type; PyObject *addr; -} pepy_relocation; +}; /* None of the attributes in these objects are writable. */ static int @@ -128,7 +150,7 @@ static void pepy_import_dealloc(pepy_import *self) { Py_XDECREF(self->name); Py_XDECREF(self->sym); Py_XDECREF(self->addr); - self->ob_type->tp_free((PyObject *) self); + Py_TYPE(self)->tp_free((PyObject *) self); } PEPY_OBJECT_GET(import, name) @@ -142,7 +164,7 @@ static PyGetSetDef pepy_import_getseters[] = { {NULL}}; static PyTypeObject pepy_import_type = { - PyObject_HEAD_INIT(NULL) 0, /* ob_size */ + PyVarObject_HEAD_INIT(NULL,0) /* ob_size */ "pepy.import", /* tp_name */ sizeof(pepy_import), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -202,7 +224,7 @@ static void pepy_export_dealloc(pepy_export *self) { Py_XDECREF(self->mod); Py_XDECREF(self->func); Py_XDECREF(self->addr); - self->ob_type->tp_free((PyObject *) self); + Py_TYPE(self)->tp_free((PyObject *) self); } PEPY_OBJECT_GET(export, mod) @@ -216,7 +238,7 @@ static PyGetSetDef pepy_export_getseters[] = { {NULL}}; static PyTypeObject pepy_export_type = { - PyObject_HEAD_INIT(NULL) 0, /* ob_size */ + PyVarObject_HEAD_INIT(NULL,0) /* ob_size */ "pepy.export", /* tp_name */ sizeof(pepy_export), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -276,7 +298,7 @@ pepy_relocation_init(pepy_relocation *self, PyObject *args, PyObject *kwds) { static void pepy_relocation_dealloc(pepy_relocation *self) { Py_XDECREF(self->type); Py_XDECREF(self->addr); - self->ob_type->tp_free((PyObject *) self); + Py_TYPE(self)->tp_free((PyObject *) self); } PEPY_OBJECT_GET(relocation, type) @@ -288,7 +310,7 @@ static PyGetSetDef pepy_relocation_getseters[] = { {NULL}}; static PyTypeObject pepy_relocation_type = { - PyObject_HEAD_INIT(NULL) 0, /* ob_size */ + PyVarObject_HEAD_INIT(NULL,0) /* ob_size */ "pepy.relocation", /* tp_name */ sizeof(pepy_relocation), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -364,7 +386,7 @@ static void pepy_section_dealloc(pepy_section *self) { Py_XDECREF(self->numlinenums); Py_XDECREF(self->characteristics); Py_XDECREF(self->data); - self->ob_type->tp_free((PyObject *) self); + Py_TYPE(self)->tp_free((PyObject *) self); } PEPY_OBJECT_GET(section, name) @@ -390,7 +412,7 @@ static PyGetSetDef pepy_section_getseters[] = { {NULL}}; static PyTypeObject pepy_section_type = { - PyObject_HEAD_INIT(NULL) 0, /* ob_size */ + PyVarObject_HEAD_INIT(NULL,0) /* ob_size */ "pepy.section", /* tp_name */ sizeof(pepy_section), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -469,7 +491,7 @@ static void pepy_resource_dealloc(pepy_resource *self) { Py_XDECREF(self->RVA); Py_XDECREF(self->size); Py_XDECREF(self->data); - self->ob_type->tp_free((PyObject *) self); + Py_TYPE(self)->tp_free((PyObject *) self); } PEPY_OBJECT_GET(resource, type_str) @@ -594,7 +616,7 @@ static PyGetSetDef pepy_resource_getseters[] = { {NULL}}; static PyTypeObject pepy_resource_type = { - PyObject_HEAD_INIT(NULL) 0, /* ob_size */ + PyVarObject_HEAD_INIT(NULL,0) /* ob_size */ "pepy.resource", /* tp_name */ sizeof(pepy_resource), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -662,7 +684,7 @@ static int pepy_parsed_init(pepy_parsed *self, PyObject *args, PyObject *kwds) { static void pepy_parsed_dealloc(pepy_parsed *self) { DestructParsedPE(self->pe); - self->ob_type->tp_free((PyObject *) self); + Py_TYPE(self)->tp_free((PyObject *) self); } static PyObject *pepy_parsed_get_entry_point(PyObject *self, PyObject *args) { @@ -682,57 +704,38 @@ static PyObject *pepy_parsed_get_entry_point(PyObject *self, PyObject *args) { } static PyObject *pepy_parsed_get_bytes(PyObject *self, PyObject *args) { - uint64_t start, idx; - uint8_t b; - Py_ssize_t len; - PyObject *byte, *tmp, *ret, *newlist; + uint64_t start; + Py_ssize_t len, idx; + PyObject *ret; if (!PyArg_ParseTuple(args, "KK:pepy_parsed_get_bytes", &start, &len)) return NULL; /* - * XXX: I don't think this is the best way to do this. I want a - * ByteArray object to be returned so first put each byte in a - * list and then call PyByteArray_FromObject to get the byte array. + * XXX: a new implementation read all bytes in char* and use + * PybyteArray_FromStringAndSize */ - tmp = PyList_New(len); - if (!tmp) { - PyErr_SetString(pepy_error, "Unable to create initial list."); + + uint8_t *buf = new(std::nothrow) uint8_t[len]; + if (!buf) { + /* in case allocation failed */ + PyErr_SetString(pepy_error, "Unable to create initial buffer (allocation failure)."); return NULL; } - for (idx = 0; idx < len; idx++) { - if (!ReadByteAtVA(((pepy_parsed *) self)->pe, start + idx, b)) + for (idx = 0; idx < len; idx++) { + if (!ReadByteAtVA(((pepy_parsed *) self)->pe, start + idx, buf[idx])) break; - - byte = PyInt_FromLong(b); - if (!byte) { - Py_DECREF(tmp); - PyErr_SetString(pepy_error, "Unable to create integer object."); - return NULL; - } - PyList_SET_ITEM(tmp, idx, byte); - Py_DECREF(byte); } - /* Didn't get all of it for some reason, so give back what we have. */ - if (idx < len) { - newlist = PyList_GetSlice(tmp, 0, idx); - if (!newlist) { - PyErr_SetString(pepy_error, "Unable to create new list."); - return NULL; - } - Py_DECREF(tmp); - tmp = newlist; - } - - ret = PyByteArray_FromObject(tmp); + /* use idx as content length, if we get less than asked for */ + ret = PyByteArray_FromStringAndSize(reinterpret_cast(buf), idx); if (!ret) { - PyErr_SetString(pepy_error, "Unable to create new list."); + PyErr_SetString(pepy_error, "Unable to create new byte array."); return NULL; } - Py_DECREF(tmp); + delete[] buf; return ret; } @@ -1200,7 +1203,7 @@ static PyMethodDef pepy_parsed_methods[] = { {NULL}}; static PyTypeObject pepy_parsed_type = { - PyObject_HEAD_INIT(NULL) 0, /* ob_size */ + PyVarObject_HEAD_INIT(NULL,0) /* ob_size */ "pepy.parsed", /* tp_name */ sizeof(pepy_parsed), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -1276,7 +1279,8 @@ static PyObject *pepy_parse(PyObject *self, PyObject *args) { static PyMethodDef pepy_methods[] = { {"parse", pepy_parse, METH_VARARGS, "Parse PE from file."}, {NULL}}; -PyMODINIT_FUNC initpepy(void) { +static +PyObject* pepi_module_init(void) { PyObject *m; if (PyType_Ready(&pepy_parsed_type) < 0 || @@ -1285,11 +1289,29 @@ PyMODINIT_FUNC initpepy(void) { PyType_Ready(&pepy_export_type) < 0 || PyType_Ready(&pepy_relocation_type) < 0 || PyType_Ready(&pepy_resource_type) < 0) - return; + return NULL; +#if PY_MAJOR_VERSION >= 3 + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "pepy", + "Python interface to pe-parse.", + -1, + pepy_methods, + NULL, + NULL, + NULL, + NULL, + }; +#endif + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else m = Py_InitModule3("pepy", pepy_methods, "Python interface to pe-parse."); +#endif if (!m) - return; + return NULL; pepy_error = PyErr_NewException((char *) "pepy.error", NULL, NULL); Py_INCREF(pepy_error); @@ -1374,4 +1396,18 @@ PyMODINIT_FUNC initpepy(void) { PyModule_AddIntMacro(m, IMAGE_SCN_MEM_EXECUTE); PyModule_AddIntMacro(m, IMAGE_SCN_MEM_READ); PyModule_AddIntMacro(m, IMAGE_SCN_MEM_WRITE); + + return m; } + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC PyInit_pepy(void) +{ + return pepi_module_init(); +} +#else +PyMODINIT_FUNC initpepy(void) +{ + pepi_module_init(); +} +#endif diff --git a/python/test_python3.py b/python/test_python3.py new file mode 100755 index 0000000..f50ee89 --- /dev/null +++ b/python/test_python3.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +import sys +import time +import pepy +import binascii + +from hashlib import md5 + +try: + p = pepy.parse(sys.argv[1]) +except pepy.error as e: + print(e) + sys.exit(1) + +print("Magic: %s" % hex(p.magic)) +print("Signature: %s" % hex(p.signature)) +print("Machine: %s" % hex(p.machine)) +print("Number of sections: %s" % p.numberofsections) +print("Number of symbols: %s" % p.numberofsymbols) +print("Characteristics: %s" % hex(p.characteristics)) +print("Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))) +print("Major linker version: %s" % hex(p.majorlinkerver)) +print("Minor linker version: %s" % hex(p.minorlinkerver)) +print("Size of code: %s" % hex(p.codesize)) +print("Size of initialized data: %s" % hex(p.initdatasize)) +print("Size of uninitialized data: %s" % hex(p.uninitdatasize)) +print("Address of entry point: %s" % hex(p.entrypointaddr)) +print("Base address of code: %s" % hex(p.baseofcode)) +try: + print("Base address of data: %s" % hex(p.baseofdata)) +except: + # Not available on PE32+, ignore it. + pass +print("Image base address: %s" % hex(p.imagebase)) +print("Section alignment: %s" % hex(p.sectionalignement)) +print("File alignment: %s" % hex(p.filealingment)) +print("Major OS version: %s" % hex(p.majorosver)) +print("Minor OS version: %s" % hex(p.minorosver)) +print("Win32 version: %s" % hex(p.win32ver)) +print("Size of image: %s" % hex(p.imagesize)) +print("Size of headers: %s" % hex(p.headersize)) +print("Checksum: %s" % hex(p.checksum)) +print("Subsystem: %s" % hex(p.subsystem)) +print("DLL characteristics: %s" % hex(p.dllcharacteristics)) +print("Size of stack reserve: %s" % hex(p.stackreservesize)) +print("Size of stack commit: %s" % hex(p.stackcommitsize)) +print("Size of heap reserve: %s" % hex(p.heapreservesize)) +print("Size of heap commit: %s" % hex(p.heapcommitsize)) +print("Loader flags: %s" % hex(p.loaderflags)) +print("Number of RVA and sizes: %s" % hex(p.rvasandsize)) +ep = p.get_entry_point() +byts = p.get_bytes(ep, 8) +print("Bytes at %s: %s" % (hex(ep), ' '.join(['%#2x' % b for b in byts]))) +sections = p.get_sections() +print("Sections: (%i)" % len(sections)) +for sect in sections: + print("[+] %s" % sect.name) + print("\tBase: %s" % hex(sect.base)) + print("\tLength: %s" % sect.length) + print("\tVirtual address: %s" % hex(sect.virtaddr)) + print("\tVirtual size: %i" % sect.virtsize) + print("\tNumber of Relocations: %i" % sect.numrelocs) + print("\tNumber of Line Numbers: %i" % sect.numlinenums) + print("\tCharacteristics: %s" % hex(sect.characteristics)) + if sect.length: + print("\tFirst 10 bytes: 0x%s" % binascii.hexlify(sect.data[:10])) + print("\tMD5: %s" % md5(sect.data).hexdigest()) +imports = p.get_imports() +print("Imports: (%i)" % len(imports)) +for imp in imports: + print("[+] Symbol: %s (%s %s)" % (imp.sym, imp.name, hex(imp.addr))) +exports = p.get_exports() +print("Exports: (%i)" % len(exports)) +for exp in exports: + print("[+] Module: %s (%s %s)" % (exp.mod, exp.func, hex(exp.addr))) +relocations = p.get_relocations() +print("Relocations: (%i)" % len(relocations)) +for reloc in relocations: + print("[+] Type: %s (%s)" % (reloc.type, hex(reloc.addr))) +resources = p.get_resources() +print("Resources: (%i)" % len(resources)) +for resource in resources: + print("[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())) + if resource.type_str: + print("\tType string: %s" % resource.type_str) + else: + print("\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())) + if resource.name_str: + print("\tName string: %s" % resource.name_str) + else: + print("\tName: %s" % hex(resource.name)) + if resource.lang_str: + print("\tLang string: %s" % resource.lang_str) + else: + print("\tLang: %s" % hex(resource.lang)) + print("\tCodepage: %s" % hex(resource.codepage)) + print("\tRVA: %s" % hex(resource.RVA)) + print("\tSize: %s" % hex(resource.size))