Make resource parsing more resilient.

I have a UPX packed sample that corrupted the resource directory. These changes allow the resources to be properly parsed. They add an RVA and size to the resource struct. This is the address and size of the resource as it is declared in the directory. If the address is invalid create a zero-length buffer for the data. If the size is invalid (ie: it goes off the end of the .rsrc section) create a zero-length buffer for the data. Otherwise, return the actual data. This allows consumers of the rsrc to figure out if the resource is corrupt or not by comparing the length of the buffer to the size element. If the size is greater than 0 but buffer is empty then it's invalid. Also, it should never happen but just to be safe make pepy catch NULL buffers (in pepy_data_converter) and return an empty bytearray.
2025-04-26 20:34:31 +00:00 · 2013-12-30 16:45:50 -05:00 · 2013-12-30 16:45:50 -05:00 · ec5c49eaff
commit ec5c49eaff
parent b9d1592c50
5 changed files with 49 additions and 6 deletions
--- a/dump-prog/dump.cpp
+++ b/dump-prog/dump.cpp
@ -105,6 +105,8 @@ int printRsrc(void     *N,
  else
    cout << "Lang: " << to_string<uint32_t>(r.lang, hex) << endl;
  cout << "Codepage: " << to_string<uint32_t>(r.codepage, hex) << endl;
+  cout << "RVA: " << to_string<uint32_t>(r.RVA, dec) << endl;
+  cout << "Size: " << to_string<uint32_t>(r.size, dec) << endl;
  return 0;
 }

--- a/parser-library/parse.cpp
+++ b/parser-library/parse.cpp
@ -219,14 +219,29 @@ bool parse_resource_table(bounded_buffer *sectionData, ::uint32_t o, ::uint32_t
      rsrc.name = rde->name;
      rsrc.lang = rde->lang;
      rsrc.codepage = rdat.codepage;
+      rsrc.RVA = rdat.RVA;
+      rsrc.size = rdat.size;

      // The start address is (RVA - section virtual address).
      uint32_t start = rdat.RVA - virtaddr;
+      /*
+       * Some binaries (particularly packed) will have invalid addresses here.
+       * If those happen, return a zero length buffer.
+       * If the start is valid, try to get the data and if that fails return
+       * a zero length buffer.
+       */
      if (start > rdat.RVA)
-        return false;
-      rsrc.buf = splitBuffer(sectionData, start, start + rdat.size);
+        rsrc.buf = splitBuffer(sectionData, 0, 0);
+      else {
+        rsrc.buf = splitBuffer(sectionData, start, start + rdat.size);
+        if (!rsrc.buf)
+          rsrc.buf = splitBuffer(sectionData, 0, 0);
+      }
+
+      /* If we can't get even a zero length buffer, something is very wrong. */
      if (!rsrc.buf)
        return false;
+
      rsrcs.push_back(rsrc);
    }
  }
--- a/parser-library/parse.h
+++ b/parser-library/parse.h
@ -49,6 +49,8 @@ struct resource {
  boost::uint32_t name;
  boost::uint32_t lang;
  boost::uint32_t codepage;
+  boost::uint32_t RVA;
+  boost::uint32_t size;
  bounded_buffer  *buf;
 };

--- a/python/pepy.cpp
+++ b/python/pepy.cpp
@ -76,6 +76,8 @@ typedef struct {
 	PyObject *name;
 	PyObject *lang;
 	PyObject *codepage;
+	PyObject *RVA;
+	PyObject *size;
 	PyObject *data;
 } pepy_resource;

@ -425,7 +427,7 @@ static PyObject *pepy_resource_new(PyTypeObject *type, PyObject *args, PyObject
 }

 static int pepy_resource_init(pepy_resource *self, PyObject *args, PyObject *kwds) {
-	if (!PyArg_ParseTuple(args, "OOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->data))
+	if (!PyArg_ParseTuple(args, "OOOOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->RVA, &self->size, &self->data))
 		return -1;

 	return 0;
@ -439,6 +441,8 @@ static void pepy_resource_dealloc(pepy_resource *self) {
 	Py_XDECREF(self->name);
 	Py_XDECREF(self->lang);
 	Py_XDECREF(self->codepage);
+	Py_XDECREF(self->RVA);
+	Py_XDECREF(self->size);
 	Py_XDECREF(self->data);
 	self->ob_type->tp_free((PyObject *) self);
 }
@ -450,6 +454,8 @@ PEPY_OBJECT_GET(resource, type)
 PEPY_OBJECT_GET(resource, name)
 PEPY_OBJECT_GET(resource, lang)
 PEPY_OBJECT_GET(resource, codepage)
+PEPY_OBJECT_GET(resource, RVA)
+PEPY_OBJECT_GET(resource, size)
 PEPY_OBJECT_GET(resource, data)

 static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
@ -556,6 +562,8 @@ static PyGetSetDef pepy_resource_getseters[] = {
 	OBJECTGETTER(resource, name, "Name"),
 	OBJECTGETTER(resource, lang, "Language"),
 	OBJECTGETTER(resource, codepage, "Codepage"),
+	OBJECTGETTER(resource, RVA, "RVA"),
+	OBJECTGETTER(resource, size, "Size (specified in RDAT)"),
 	OBJECTGETTER(resource, data, "Resource data"),
 	{ NULL }
 };
@ -703,11 +711,25 @@ static PyObject *pepy_parsed_get_bytes(PyObject *self, PyObject *args) {
 	return ret;
 }

-/* This is used to convert bounded buffers into python byte array objects. */
+/*
+ * This is used to convert bounded buffers into python byte array objects.
+ * In case the buffer is NULL, return an empty bytearray.
+ */
 static PyObject *pepy_data_converter(bounded_buffer *data) {
 	PyObject* ret;
+	const char *str;
+	Py_ssize_t len;

-	ret = PyByteArray_FromStringAndSize((const char *) data->buf, data->bufLen);
+	if (!data || !data->buf) {
+		str = "";
+		len = 0;
+	}
+	else {
+		str = (const char *) data->buf;
+		len = data->bufLen;
+	}
+
+	ret = PyByteArray_FromStringAndSize(str, len);
 	if (!ret) {
 		PyErr_SetString(pepy_error, "Unable to convert data to byte array.");
 		return NULL;
@ -773,7 +795,7 @@ int resource_callback(void *cbd, resource r) {
 	 * The tuple item order is important here. It is passed into the
 	 * section type initialization and parsed there.
 	 */
-	tuple = Py_BuildValue("s#s#s#IIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, pepy_data_converter, r.buf);
+	tuple = Py_BuildValue("s#s#s#IIIIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, r.RVA, r.size, pepy_data_converter, r.buf);
 	if (!tuple)
 		return 1;

--- a/python/test.py
+++ b/python/test.py
@ -86,3 +86,5 @@ for resource in resources:
    else:
        print "\tLang: %s" % hex(resource.lang)
    print "\tCodepage: %s" % hex(resource.codepage)
+    print "\tRVA: %s" % hex(resource.RVA)
+    print "\tSize: %s" % hex(resource.size)