From ec5c49eaff1c54ff779bf36ab1212a749d05e185 Mon Sep 17 00:00:00 2001 From: Wesley Shields <wxs@atarininja.org> Date: Mon, 30 Dec 2013 16:45:50 -0500 Subject: [PATCH] Make resource parsing more resilient. I have a UPX packed sample that corrupted the resource directory. These changes allow the resources to be properly parsed. They add an RVA and size to the resource struct. This is the address and size of the resource as it is declared in the directory. If the address is invalid create a zero-length buffer for the data. If the size is invalid (ie: it goes off the end of the .rsrc section) create a zero-length buffer for the data. Otherwise, return the actual data. This allows consumers of the rsrc to figure out if the resource is corrupt or not by comparing the length of the buffer to the size element. If the size is greater than 0 but buffer is empty then it's invalid. Also, it should never happen but just to be safe make pepy catch NULL buffers (in pepy_data_converter) and return an empty bytearray. --- dump-prog/dump.cpp | 2 ++ parser-library/parse.cpp | 19 +++++++++++++++++-- parser-library/parse.h | 2 ++ python/pepy.cpp | 30 ++++++++++++++++++++++++++---- python/test.py | 2 ++ 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/dump-prog/dump.cpp b/dump-prog/dump.cpp index fe170f0..953ea30 100644 --- a/dump-prog/dump.cpp +++ b/dump-prog/dump.cpp @@ -105,6 +105,8 @@ int printRsrc(void *N, else cout << "Lang: " << to_string<uint32_t>(r.lang, hex) << endl; cout << "Codepage: " << to_string<uint32_t>(r.codepage, hex) << endl; + cout << "RVA: " << to_string<uint32_t>(r.RVA, dec) << endl; + cout << "Size: " << to_string<uint32_t>(r.size, dec) << endl; return 0; } diff --git a/parser-library/parse.cpp b/parser-library/parse.cpp index 51d6d37..2b0dd43 100644 --- a/parser-library/parse.cpp +++ b/parser-library/parse.cpp @@ -219,14 +219,29 @@ bool parse_resource_table(bounded_buffer *sectionData, ::uint32_t o, ::uint32_t rsrc.name = rde->name; rsrc.lang = rde->lang; rsrc.codepage = rdat.codepage; + rsrc.RVA = rdat.RVA; + rsrc.size = rdat.size; // The start address is (RVA - section virtual address). uint32_t start = rdat.RVA - virtaddr; + /* + * Some binaries (particularly packed) will have invalid addresses here. + * If those happen, return a zero length buffer. + * If the start is valid, try to get the data and if that fails return + * a zero length buffer. + */ if (start > rdat.RVA) - return false; - rsrc.buf = splitBuffer(sectionData, start, start + rdat.size); + rsrc.buf = splitBuffer(sectionData, 0, 0); + else { + rsrc.buf = splitBuffer(sectionData, start, start + rdat.size); + if (!rsrc.buf) + rsrc.buf = splitBuffer(sectionData, 0, 0); + } + + /* If we can't get even a zero length buffer, something is very wrong. */ if (!rsrc.buf) return false; + rsrcs.push_back(rsrc); } } diff --git a/parser-library/parse.h b/parser-library/parse.h index 8ae7df2..8bc5bf5 100644 --- a/parser-library/parse.h +++ b/parser-library/parse.h @@ -49,6 +49,8 @@ struct resource { boost::uint32_t name; boost::uint32_t lang; boost::uint32_t codepage; + boost::uint32_t RVA; + boost::uint32_t size; bounded_buffer *buf; }; diff --git a/python/pepy.cpp b/python/pepy.cpp index 8622c80..9304530 100644 --- a/python/pepy.cpp +++ b/python/pepy.cpp @@ -76,6 +76,8 @@ typedef struct { PyObject *name; PyObject *lang; PyObject *codepage; + PyObject *RVA; + PyObject *size; PyObject *data; } pepy_resource; @@ -425,7 +427,7 @@ static PyObject *pepy_resource_new(PyTypeObject *type, PyObject *args, PyObject } static int pepy_resource_init(pepy_resource *self, PyObject *args, PyObject *kwds) { - if (!PyArg_ParseTuple(args, "OOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->data)) + if (!PyArg_ParseTuple(args, "OOOOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->RVA, &self->size, &self->data)) return -1; return 0; @@ -439,6 +441,8 @@ static void pepy_resource_dealloc(pepy_resource *self) { Py_XDECREF(self->name); Py_XDECREF(self->lang); Py_XDECREF(self->codepage); + Py_XDECREF(self->RVA); + Py_XDECREF(self->size); Py_XDECREF(self->data); self->ob_type->tp_free((PyObject *) self); } @@ -450,6 +454,8 @@ PEPY_OBJECT_GET(resource, type) PEPY_OBJECT_GET(resource, name) PEPY_OBJECT_GET(resource, lang) PEPY_OBJECT_GET(resource, codepage) +PEPY_OBJECT_GET(resource, RVA) +PEPY_OBJECT_GET(resource, size) PEPY_OBJECT_GET(resource, data) static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) { @@ -556,6 +562,8 @@ static PyGetSetDef pepy_resource_getseters[] = { OBJECTGETTER(resource, name, "Name"), OBJECTGETTER(resource, lang, "Language"), OBJECTGETTER(resource, codepage, "Codepage"), + OBJECTGETTER(resource, RVA, "RVA"), + OBJECTGETTER(resource, size, "Size (specified in RDAT)"), OBJECTGETTER(resource, data, "Resource data"), { NULL } }; @@ -703,11 +711,25 @@ static PyObject *pepy_parsed_get_bytes(PyObject *self, PyObject *args) { return ret; } -/* This is used to convert bounded buffers into python byte array objects. */ +/* + * This is used to convert bounded buffers into python byte array objects. + * In case the buffer is NULL, return an empty bytearray. + */ static PyObject *pepy_data_converter(bounded_buffer *data) { PyObject* ret; + const char *str; + Py_ssize_t len; - ret = PyByteArray_FromStringAndSize((const char *) data->buf, data->bufLen); + if (!data || !data->buf) { + str = ""; + len = 0; + } + else { + str = (const char *) data->buf; + len = data->bufLen; + } + + ret = PyByteArray_FromStringAndSize(str, len); if (!ret) { PyErr_SetString(pepy_error, "Unable to convert data to byte array."); return NULL; @@ -773,7 +795,7 @@ int resource_callback(void *cbd, resource r) { * The tuple item order is important here. It is passed into the * section type initialization and parsed there. */ - tuple = Py_BuildValue("s#s#s#IIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, pepy_data_converter, r.buf); + tuple = Py_BuildValue("s#s#s#IIIIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, r.RVA, r.size, pepy_data_converter, r.buf); if (!tuple) return 1; diff --git a/python/test.py b/python/test.py index eec9d66..f31665c 100755 --- a/python/test.py +++ b/python/test.py @@ -86,3 +86,5 @@ for resource in resources: else: print "\tLang: %s" % hex(resource.lang) print "\tCodepage: %s" % hex(resource.codepage) + print "\tRVA: %s" % hex(resource.RVA) + print "\tSize: %s" % hex(resource.size)