Implement resource parsing.

While here, fix a memory leak in pepy as I was not decrementing the
reference counter on self->data in section_dealloc().
This commit is contained in:
Wesley Shields 2013-12-24 12:41:59 -05:00
parent b139ae3b39
commit a6af4cbd18
8 changed files with 578 additions and 8 deletions

View File

@ -8,6 +8,7 @@ pe-parse supports these use cases via a minimal API that provides methods for
* Iterating over the relocations
* Iterating over the exported functions
* Iterating over sections
* Iterating over resources
* Reading bytes from specified virtual addresses
* Retrieving the program entry point

View File

@ -89,6 +89,25 @@ int printRelocs(void *N, VA relocAddr, reloc_type type) {
return 0 ;
}
int printRsrc(void *N,
resource r)
{
if (r.type_str.length())
cout << "Type (string): " << r.type_str << endl;
else
cout << "Type: " << to_string<uint32_t>(r.type, hex) << endl;
if (r.name_str.length())
cout << "Name (string): " << r.name_str << endl;
else
cout << "Name: " << to_string<uint32_t>(r.name, hex) << endl;
if (r.lang_str.length())
cout << "Lang (string): " << r.lang_str << endl;
else
cout << "Lang: " << to_string<uint32_t>(r.lang, hex) << endl;
cout << "Codepage: " << to_string<uint32_t>(r.codepage, hex) << endl;
return 0;
}
int printSecs(void *N,
VA secBase,
string &secName,
@ -177,6 +196,7 @@ int main(int argc, char *argv[]) {
cout << endl;
}
IterRsrc(p, printRsrc, NULL);
DestructParsedPE(p);
}
}

View File

@ -168,6 +168,27 @@ struct nt_header_32 {
optional_header_32 OptionalHeader;
};
struct resource_dir_table {
boost::uint32_t Characteristics;
boost::uint32_t TimeDateStamp;
boost::uint16_t MajorVersion;
boost::uint16_t MinorVersion;
boost::uint16_t NameEntries;
boost::uint16_t IDEntries;
};
struct resource_dir_entry {
boost::uint32_t ID;
boost::uint32_t RVA;
};
struct resource_dat_entry {
boost::uint32_t RVA;
boost::uint32_t size;
boost::uint32_t codepage;
boost::uint32_t reserved;
};
struct image_section_header {
boost::uint8_t Name[NT_SHORT_NAME_LEN];
union {

View File

@ -56,6 +56,7 @@ struct reloc {
struct parsed_pe_internal {
list<section> secs;
list<resource> rsrcs;
list<importent> imports;
list<reloc> relocs;
list<exportent> exports;
@ -80,6 +81,165 @@ bool getSecForVA(list<section> &secs, VA v, section &sec) {
return false;
}
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
parsed_pe_internal *pint = pe->internal;
for(list<resource>::iterator rit = pint->rsrcs.begin(), e = pint->rsrcs.end();
rit != e;
++rit)
{
resource r = *rit;
if(cb(cbd, r) != 0) {
break;
}
}
return;
}
bool parse_resource_id(bounded_buffer *data, ::uint32_t id, string &result) {
::uint8_t c;
::uint16_t len;
if (id & 0x80000000) {
::uint32_t start = id & 0x0FFFFFFF;
if (readWord(data, start, len) == false)
return false;
start += 2;
for (::uint32_t i = 0; i < len * 2; i++) {
if(readByte(data, start + i, c) == false) {
return false;
}
result.push_back((char) c);
}
}
return true;
}
bool parse_resource(bounded_buffer *sectionData, ::uint32_t o, ::uint32_t virtaddr, resource *res, list<resource> &rsrcs) {
::uint32_t i = 0;
resource_dir_table rdt;
if (!sectionData)
return false;
#define READ_WORD(x) \
if(readWord(sectionData, o+_offset(resource_dir_table, x), rdt.x) == false) { \
return false; \
}
#define READ_DWORD(x) \
if(readDword(sectionData, o+_offset(resource_dir_table, x), rdt.x) == false) { \
return false; \
}
READ_DWORD(Characteristics);
READ_DWORD(TimeDateStamp);
READ_WORD(MajorVersion);
READ_WORD(MinorVersion);
READ_WORD(NameEntries);
READ_WORD(IDEntries);
#undef READ_WORD
#undef READ_DWORD
o += sizeof(resource_dir_table);
if (!rdt.NameEntries && !rdt.IDEntries)
return true; // This is not a hard error. It does happen.
for (i = 0; i < rdt.NameEntries + rdt.IDEntries; i++) {
resource_dir_entry rde;
resource *rsrc;
#define READ_DWORD(x) \
if(readDword(sectionData, o+_offset(resource_dir_entry, x), rde.x) == false) { \
return false; \
}
READ_DWORD(ID);
READ_DWORD(RVA);
#undef READ_DWORD
o += sizeof(resource_dir_entry);
if (!res) {
rsrc = new resource();
if (!rsrc)
return false;
} else {
rsrc = res;
}
if (rsrc->depth == 0) {
rsrc->type = rde.ID;
if (parse_resource_id(sectionData, rde.ID, rsrc->type_str) == false)
return false;
} else if (rsrc->depth == 1) {
rsrc->name = rde.ID;
if (parse_resource_id(sectionData, rde.ID, rsrc->name_str) == false)
return false;
} else if (rsrc->depth == 2) {
rsrc->lang = rde.ID;
if (parse_resource_id(sectionData, rde.ID, rsrc->lang_str) == false)
return false;
}
rsrc->depth++;
// High bit 0 = RVA to RDT.
// High bit 1 = RVA to RDE.
if (rde.RVA & 0x80000000) {
if (parse_resource(sectionData, rde.RVA & 0x0FFFFFFF, virtaddr, rsrc, rsrcs) == false)
return false;
} else {
resource_dat_entry rdat;
o = rde.RVA;
#define READ_DWORD(x) \
if(readDword(sectionData, o+_offset(resource_dat_entry, x), rdat.x) == false) { \
return false; \
}
READ_DWORD(RVA);
READ_DWORD(size);
READ_DWORD(codepage);
READ_DWORD(reserved);
#undef READ_DWORD
rsrc->codepage = rdat.codepage;
// The start address is (RVA - section virtual address).
uint32_t start = rdat.RVA - virtaddr;
if (start > rdat.RVA)
return false;
rsrc->buf = splitBuffer(sectionData, start, start + rdat.size);
if (!rsrc->buf)
return false;
rsrcs.push_back(*rsrc);
}
}
return true;
}
bool getResources(bounded_buffer *b, bounded_buffer *fileBegin, list<section> secs, list<resource> &rsrcs) {
if (!b)
return false;
for (list<section>::iterator sit = secs.begin(), e = secs.end(); sit != e; ++sit) {
section s = *sit;
if (s.sectionName != ".rsrc")
continue;
if (parse_resource(s.sectionData, 0, s.sec.VirtualAddress, NULL, rsrcs) == false)
return false;
break; // Because there should only be one .rsrc
}
return true;
}
bool getSections( bounded_buffer *b,
bounded_buffer *fileBegin,
nt_header_32 &nthdr,
@ -358,6 +518,13 @@ parsed_pe *ParsePEFromFile(const char *filePath) {
return NULL;
}
if(getResources(remaining, file, p->internal->secs, p->internal->rsrcs) == false) {
deleteBuffer(remaining);
deleteBuffer(p->fileBuffer);
delete p;
return NULL;
}
//get exports
data_directory exportDir =
p->peHeader.nt.OptionalHeader.DataDirectory[DIR_EXPORT];

View File

@ -41,6 +41,43 @@ typedef struct _bounded_buffer {
buffer_detail *detail;
} bounded_buffer;
struct resource {
boost::uint32_t depth;
std::string type_str;
std::string name_str;
std::string lang_str;
boost::uint32_t type;
boost::uint32_t name;
boost::uint32_t lang;
boost::uint32_t codepage;
bounded_buffer *buf;
};
// http://msdn.microsoft.com/en-us/library/ms648009(v=vs.85).aspx
enum resource_type {
RT_CURSOR = 1,
RT_BITMAP = 2,
RT_ICON = 3,
RT_MENU = 4,
RT_DIALOG = 5,
RT_STRING = 6,
RT_FONTDIR = 7,
RT_FONT = 8,
RT_ACCELERATOR = 9,
RT_RCDATA = 10,
RT_MESSAGETABLE = 11,
RT_GROUP_CURSOR = 12, // MAKEINTRESOURCE((ULONG_PTR)(RT_CURSOR) + 11)
RT_GROUP_ICON = 14, // MAKEINTRESOURCE((ULONG_PTR)(RT_ICON) + 11)
RT_VERSION = 16,
RT_DLGINCLUDE = 17,
RT_PLUGPLAY = 19,
RT_VXD = 20,
RT_ANICURSOR = 21,
RT_ANIICON = 22,
RT_HTML = 23,
RT_MANIFEST = 24
};
bool readByte(bounded_buffer *b, boost::uint32_t offset, boost::uint8_t &out);
bool readWord(bounded_buffer *b, boost::uint32_t offset, boost::uint16_t &out);
bool readDword(bounded_buffer *b, boost::uint32_t offset, boost::uint32_t &out);
@ -68,6 +105,10 @@ parsed_pe *ParsePEFromFile(const char *filePath);
//destruct a PE context
void DestructParsedPE(parsed_pe *pe);
//iterate over the resources
typedef int (*iterRsrc)(void *, resource);
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd);
//iterate over the imports by RVA and string
typedef int (*iterVAStr)(void *, VA, std::string &, std::string &);
void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd);

View File

@ -32,6 +32,7 @@ The **parsed** object has a number of methods:
* get_imports: Return a list of import objects
* get_exports: Return a list of export objects
* get_relocations: Return a list of relocation objects
* get_resources: Return a list of resource objects
The **parsed** object has a number of attributes:
@ -79,10 +80,10 @@ ep = p.get_entry_point()
print "Entry point: 0x%x" % ep
```
The *get_sections*, *get_imports*, *get_exports* and *get_relocations* methods
each return a list of objects. The type of object depends upon the method
called. *get_sections* returns a list of **section** objects, *get_imports*
returns a list of **import** objects, etc.
The *get_sections*, *get_imports*, *get_exports*, *get_relocations* and
*get_resources* methods each return a list of objects. The type of object
depends upon the method called. *get_sections* returns a list of **section**
objects, *get_imports* returns a list of **import** objects, etc.
Section Object
--------------
@ -120,6 +121,58 @@ The **relocation** object has the following attributes:
* type
* addr
Resource Object
---------------
The **resource** object has the following attributes:
* type_str
* name_str
* lang_str
* type
* name
* lang
* codepage
* data
The **resource** object has the following methods:
* type_as_str
Resources are stored in a directory structure. The first three levels of the
are called **type**, **name** and **lang**. Each of these levels can have
either a pre-defined value or a custom string. The pre-defined values are
stored in the *type*, *name* and *lang* attributes. If a custom string is
found it will be stored in the *type_str*, *name_str* and *lang_str*
attributes. The *type_as_str* method can be used to convert a pre-defined
type value to a string representation.
The following code shows how to iterate through resources:
```
import pepy
from hashlib import md5
p = pepy.parse(sys.argv[1])
resources = p.get_resources()
print "Resources: (%i)" % len(resources)
for resource in resources:
print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())
if resource.type_str:
print "\tType string: %s" % resource.type_str
else:
print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())
if resource.name_str:
print "\tName string: %s" % resource.name_str
else:
print "\tName: %s" % hex(resource.name)
if resource.lang_str:
print "\tLang string: %s" % resource.lang_str
else:
print "\tLang: %s" % hex(resource.lang)
print "\tCodepage: %s" % hex(resource.codepage)
```
Authors
=======
pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com)

View File

@ -67,6 +67,18 @@ typedef struct {
PyObject *data;
} pepy_section;
typedef struct {
PyObject_HEAD
PyObject *type_str;
PyObject *name_str;
PyObject *lang_str;
PyObject *type;
PyObject *name;
PyObject *lang;
PyObject *codepage;
PyObject *data;
} pepy_resource;
typedef struct {
PyObject_HEAD
PyObject *name;
@ -335,6 +347,7 @@ static void pepy_section_dealloc(pepy_section *self) {
Py_XDECREF(self->numrelocs);
Py_XDECREF(self->numlinenums);
Py_XDECREF(self->characteristics);
Py_XDECREF(self->data);
self->ob_type->tp_free((PyObject *) self);
}
@ -403,6 +416,192 @@ static PyTypeObject pepy_section_type = {
pepy_section_new /* tp_new */
};
static PyObject *pepy_resource_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
pepy_resource *self;
self = (pepy_resource *) type->tp_alloc(type, 0);
return (PyObject *) self;
}
static int pepy_resource_init(pepy_resource *self, PyObject *args, PyObject *kwds) {
if (!PyArg_ParseTuple(args, "OOOOOOOO:pepy_resource_init", &self->type_str, &self->name_str, &self->lang_str, &self->type, &self->name, &self->lang, &self->codepage, &self->data))
return -1;
return 0;
}
static void pepy_resource_dealloc(pepy_resource *self) {
Py_XDECREF(self->type_str);
Py_XDECREF(self->name_str);
Py_XDECREF(self->lang_str);
Py_XDECREF(self->type);
Py_XDECREF(self->name);
Py_XDECREF(self->lang);
Py_XDECREF(self->codepage);
Py_XDECREF(self->data);
self->ob_type->tp_free((PyObject *) self);
}
PEPY_OBJECT_GET(resource, type_str)
PEPY_OBJECT_GET(resource, name_str)
PEPY_OBJECT_GET(resource, lang_str)
PEPY_OBJECT_GET(resource, type)
PEPY_OBJECT_GET(resource, name)
PEPY_OBJECT_GET(resource, lang)
PEPY_OBJECT_GET(resource, codepage)
PEPY_OBJECT_GET(resource, data)
static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
PyObject *ret;
char *str;
long type;
type = PyInt_AsLong(((pepy_resource *) self)->type);
if (type == -1) {
if (PyErr_Occurred()) {
PyErr_PrintEx(0);
return NULL;
}
}
switch ((resource_type) type) {
case(RT_CURSOR):
str = (char *) "CURSOR";
break;
case(RT_BITMAP):
str = (char *) "BITMAP";
break;
case(RT_ICON):
str = (char *) "ICON";
break;
case(RT_MENU):
str = (char *) "MENU";
break;
case(RT_DIALOG):
str = (char *) "DIALOG";
break;
case(RT_STRING):
str = (char *) "STRING";
break;
case(RT_FONTDIR):
str = (char *) "FONTDIR";
break;
case(RT_FONT):
str = (char *) "FONT";
break;
case(RT_ACCELERATOR):
str = (char *) "ACCELERATOR";
break;
case(RT_RCDATA):
str = (char *) "RCDATA";
break;
case(RT_MESSAGETABLE):
str = (char *) "MESSAGETABLE";
break;
case(RT_GROUP_CURSOR):
str = (char *) "GROUP_CURSOR";
break;
case(RT_GROUP_ICON):
str = (char *) "GROUP_ICON";
break;
case(RT_VERSION):
str = (char *) "VERSION";
break;
case(RT_DLGINCLUDE):
str = (char *) "DLGINCLUDE";
break;
case(RT_PLUGPLAY):
str = (char *) "PLUGPLAY";
break;
case(RT_VXD):
str = (char *) "VXD";
break;
case(RT_ANICURSOR):
str = (char *) "ANICURSOR";
break;
case(RT_ANIICON):
str = (char *) "ANIICON";
break;
case(RT_HTML):
str = (char *) "HTML";
break;
case(RT_MANIFEST):
str = (char *) "MANIFEST";
break;
default:
str = (char *) "UNKNOWN";
break;
}
ret = PyString_FromString(str);
if (!ret) {
PyErr_SetString(pepy_error, "Unable to create return string.");
return NULL;
}
return ret;
}
static PyMethodDef pepy_resource_methods[] = {
{ "type_as_str", pepy_resource_type_as_str, METH_NOARGS,
"Return the resource type as a string." },
{ NULL }
};
static PyGetSetDef pepy_resource_getseters[] = {
OBJECTGETTER(resource, type_str, "Type string"),
OBJECTGETTER(resource, name_str, "Name string"),
OBJECTGETTER(resource, lang_str, "Lang string"),
OBJECTGETTER(resource, type, "Type"),
OBJECTGETTER(resource, name, "Name"),
OBJECTGETTER(resource, lang, "Language"),
OBJECTGETTER(resource, codepage, "Codepage"),
OBJECTGETTER(resource, data, "Resource data"),
{ NULL }
};
static PyTypeObject pepy_resource_type = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"pepy.resource", /* tp_name */
sizeof(pepy_resource), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) pepy_resource_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"pepy resource object", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
pepy_resource_methods, /* tp_methods */
0, /* tp_members */
pepy_resource_getseters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc) pepy_resource_init, /* tp_init */
0, /* tp_alloc */
pepy_resource_new /* tp_new */
};
static PyObject *pepy_parsed_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
pepy_parsed *self;
@ -499,7 +698,8 @@ static PyObject *pepy_parsed_get_bytes(PyObject *self, PyObject *args) {
return ret;
}
static PyObject *pepy_section_data_converter(bounded_buffer *data) {
/* This is used to convert bounded buffers into python byte array objects. */
static PyObject *pepy_data_converter(bounded_buffer *data) {
PyObject* ret;
ret = PyByteArray_FromStringAndSize((const char *) data->buf, data->bufLen);
@ -523,8 +723,7 @@ int section_callback(void *cbd, VA base, std::string &name, image_section_header
tuple = Py_BuildValue("sKKIIHHIO&", name.c_str(), base, data->bufLen,
s.VirtualAddress, s.Misc.VirtualSize,
s.NumberOfRelocations, s.NumberOfLinenumbers,
s.Characteristics, pepy_section_data_converter,
data);
s.Characteristics, pepy_data_converter, data);
if (!tuple)
return 1;
@ -560,6 +759,51 @@ static PyObject *pepy_parsed_get_sections(PyObject *self, PyObject *args) {
return ret;
}
int resource_callback(void *cbd, resource r) {
PyObject *rsrc;
PyObject *tuple;
PyObject *list = (PyObject *) cbd;
/*
* The tuple item order is important here. It is passed into the
* section type initialization and parsed there.
*/
tuple = Py_BuildValue("s#s#s#IIIIO&", r.type_str.c_str(), r.type_str.length(), r.name_str.c_str(), r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), r.type, r.name, r.lang, r.codepage, pepy_data_converter, r.buf);
if (!tuple)
return 1;
rsrc = pepy_resource_new(&pepy_resource_type, NULL, NULL);
if (!rsrc) {
Py_DECREF(tuple);
return 1;
}
if (pepy_resource_init((pepy_resource *) rsrc, tuple, NULL) == -1) {
PyErr_SetString(pepy_error, "Unable to init new resource");
return 1;
}
if (PyList_Append(list, rsrc) == -1) {
Py_DECREF(tuple);
Py_DECREF(rsrc);
return 1;
}
return 0;
}
static PyObject *pepy_parsed_get_resources(PyObject *self, PyObject *args) {
PyObject *ret = PyList_New(0);
if (!ret) {
PyErr_SetString(pepy_error, "Unable to create new list.");
return NULL;
}
IterRsrc(((pepy_parsed *) self)->pe, resource_callback, ret);
return ret;
}
int import_callback(void *cbd, VA addr, std::string &name, std::string &sym) {
PyObject *imp;
PyObject *tuple;
@ -789,6 +1033,8 @@ static PyMethodDef pepy_parsed_methods[] = {
"Return a list of export objects." },
{ "get_relocations", pepy_parsed_get_relocations, METH_NOARGS,
"Return a list of relocation objects." },
{ "get_resources", pepy_parsed_get_resources, METH_NOARGS,
"Return a list of resource objects." },
{ NULL }
};
@ -868,7 +1114,8 @@ PyMODINIT_FUNC initpepy(void) {
PyType_Ready(&pepy_section_type) < 0 ||
PyType_Ready(&pepy_import_type) < 0 ||
PyType_Ready(&pepy_export_type) < 0 ||
PyType_Ready(&pepy_relocation_type) < 0)
PyType_Ready(&pepy_relocation_type) < 0 ||
PyType_Ready(&pepy_resource_type) < 0)
return;
m = Py_InitModule3("pepy", pepy_methods, "Python interface to pe-parse.");
@ -894,6 +1141,9 @@ PyMODINIT_FUNC initpepy(void) {
Py_INCREF(&pepy_relocation_type);
PyModule_AddObject(m, "pepy_relocation", (PyObject *) &pepy_relocation_type);
Py_INCREF(&pepy_resource_type);
PyModule_AddObject(m, "pepy_resource", (PyObject *) &pepy_resource_type);
PyModule_AddStringMacro(m, PEPY_VERSION);
PyModule_AddIntMacro(m, MZ_MAGIC);

View File

@ -69,3 +69,20 @@ relocations = p.get_relocations()
print "Relocations: (%i)" % len(relocations)
for reloc in relocations:
print "[+] Type: %s (%s)" % (reloc.type, hex(reloc.addr))
resources = p.get_resources()
print "Resources: (%i)" % len(resources)
for resource in resources:
print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())
if resource.type_str:
print "\tType string (%i): %s" % (len(resource.type_str), resource.type_str)
else:
print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())
if resource.name_str:
print "\tName string (%i): %s" % (len(resource.name_str), resource.name_str)
else:
print "\tName: %s" % hex(resource.name)
if resource.lang_str:
print "\tLang string (%i): %s" % (len(resource.name_str), resource.lang_str)
else:
print "\tLang: %s" % hex(resource.lang)
print "\tCodepage: %s" % hex(resource.codepage)