Make resource parsing more resilient.

I have a UPX packed sample that corrupted the resource directory. These changes
allow the resources to be properly parsed.

They add an RVA and size to the resource struct. This is the address and size
of the resource as it is declared in the directory. If the address is invalid
create a zero-length buffer for the data. If the size is invalid (ie: it goes
off the end of the .rsrc section) create a zero-length buffer for the data.
Otherwise, return the actual data.

This allows consumers of the rsrc to figure out if the resource is corrupt
or not by comparing the length of the buffer to the size element. If the
size is greater than 0 but buffer is empty then it's invalid.

Also, it should never happen but just to be safe make pepy catch NULL
buffers (in pepy_data_converter) and return an empty bytearray.
This commit is contained in:
Wesley Shields 2013-12-30 16:45:50 -05:00
parent b9d1592c50
commit ec5c49eaff
5 changed files with 49 additions and 6 deletions

View File

@ -105,6 +105,8 @@ int printRsrc(void *N,
else
cout << "Lang: " << to_string<uint32_t>(r.lang, hex) << endl;
cout << "Codepage: " << to_string<uint32_t>(r.codepage, hex) << endl;
cout << "RVA: " << to_string<uint32_t>(r.RVA, dec) << endl;
cout << "Size: " << to_string<uint32_t>(r.size, dec) << endl;
return 0;
}

View File

@ -219,14 +219,29 @@ bool parse_resource_table(bounded_buffer *sectionData, ::uint32_t o, ::uint32_t
rsrc.name = rde->name;
rsrc.lang = rde->lang;
rsrc.codepage = rdat.codepage;
rsrc.RVA = rdat.RVA;
rsrc.size = rdat.size;
// The start address is (RVA - section virtual address).
uint32_t start = rdat.RVA - virtaddr;
/*
* Some binaries (particularly packed) will have invalid addresses here.
* If those happen, return a zero length buffer.
* If the start is valid, try to get the data and if that fails return
* a zero length buffer.
*/
if (start > rdat.RVA)
return false;
rsrc.buf = splitBuffer(sectionData, start, start + rdat.size);
rsrc.buf = splitBuffer(sectionData, 0, 0);
else {
rsrc.buf = splitBuffer(sectionData, start, start + rdat.size);
if (!rsrc.buf)
rsrc.buf = splitBuffer(sectionData, 0, 0);
}
/* If we can't get even a zero length buffer, something is very wrong. */
if (!rsrc.buf)
return false;
rsrcs.push_back(rsrc);
}
}

View File

@ -49,6 +49,8 @@ struct resource {
boost::uint32_t name;
boost::uint32_t lang;
boost::uint32_t codepage;
boost::uint32_t RVA;
boost::uint32_t size;
bounded_buffer *buf;
};

View File

@ -76,6 +76,8 @@ typedef struct {
PyObject *name;
PyObject *lang;
PyObject *codepage;
PyObject *RVA;
PyObject *size;
PyObject *data;
} pepy_resource;
@ -425,7 +427,7 @@ static PyObject *pepy_resource_new(PyTypeObject *type, PyObject *args, PyObject
}
static int pepy_resource_init(pepy_resource *self, PyObject *args, PyObject *kwds) {
if (!PyArg_ParseTuple(args, "OOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->data))
if (!PyArg_ParseTuple(args, "OOOOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->RVA, &self->size, &self->data))
return -1;
return 0;
@ -439,6 +441,8 @@ static void pepy_resource_dealloc(pepy_resource *self) {
Py_XDECREF(self->name);
Py_XDECREF(self->lang);
Py_XDECREF(self->codepage);
Py_XDECREF(self->RVA);
Py_XDECREF(self->size);
Py_XDECREF(self->data);
self->ob_type->tp_free((PyObject *) self);
}
@ -450,6 +454,8 @@ PEPY_OBJECT_GET(resource, type)
PEPY_OBJECT_GET(resource, name)
PEPY_OBJECT_GET(resource, lang)
PEPY_OBJECT_GET(resource, codepage)
PEPY_OBJECT_GET(resource, RVA)
PEPY_OBJECT_GET(resource, size)
PEPY_OBJECT_GET(resource, data)
static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
@ -556,6 +562,8 @@ static PyGetSetDef pepy_resource_getseters[] = {
OBJECTGETTER(resource, name, "Name"),
OBJECTGETTER(resource, lang, "Language"),
OBJECTGETTER(resource, codepage, "Codepage"),
OBJECTGETTER(resource, RVA, "RVA"),
OBJECTGETTER(resource, size, "Size (specified in RDAT)"),
OBJECTGETTER(resource, data, "Resource data"),
{ NULL }
};
@ -703,11 +711,25 @@ static PyObject *pepy_parsed_get_bytes(PyObject *self, PyObject *args) {
return ret;
}
/* This is used to convert bounded buffers into python byte array objects. */
/*
* This is used to convert bounded buffers into python byte array objects.
* In case the buffer is NULL, return an empty bytearray.
*/
static PyObject *pepy_data_converter(bounded_buffer *data) {
PyObject* ret;
const char *str;
Py_ssize_t len;
ret = PyByteArray_FromStringAndSize((const char *) data->buf, data->bufLen);
if (!data || !data->buf) {
str = "";
len = 0;
}
else {
str = (const char *) data->buf;
len = data->bufLen;
}
ret = PyByteArray_FromStringAndSize(str, len);
if (!ret) {
PyErr_SetString(pepy_error, "Unable to convert data to byte array.");
return NULL;
@ -773,7 +795,7 @@ int resource_callback(void *cbd, resource r) {
* The tuple item order is important here. It is passed into the
* section type initialization and parsed there.
*/
tuple = Py_BuildValue("s#s#s#IIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, pepy_data_converter, r.buf);
tuple = Py_BuildValue("s#s#s#IIIIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, r.RVA, r.size, pepy_data_converter, r.buf);
if (!tuple)
return 1;

View File

@ -86,3 +86,5 @@ for resource in resources:
else:
print "\tLang: %s" % hex(resource.lang)
print "\tCodepage: %s" % hex(resource.codepage)
print "\tRVA: %s" % hex(resource.RVA)
print "\tSize: %s" % hex(resource.size)