Symbol table parsing

This commit is contained in:
Jozef Kolek 2017-03-16 22:18:08 +01:00
parent 6e1e2a99b5
commit 326ca41bc6
5 changed files with 539 additions and 0 deletions

View File

@ -81,6 +81,122 @@ int printRelocs(void *N, VA relocAddr, reloc_type type) {
return 0 ;
}
int printSymbols(void *N, std::string &strName, uint32_t &value,
int16_t &sectionNumber, uint16_t &type, uint8_t &storageClass,
uint8_t &numberOfAuxSymbols) {
cout << "Symbol Name: " << strName << endl;
cout << "Symbol Value: 0x" << to_string<uint32_t>(value, hex) << endl;
cout << "Symbol Section Number: ";
switch (sectionNumber) {
case IMAGE_SYM_UNDEFINED:
cout << "UNDEFINED";
break;
case IMAGE_SYM_ABSOLUTE:
cout << "ABSOLUTE";
break;
case IMAGE_SYM_DEBUG:
cout << "DEBUG";
break;
default:
cout << sectionNumber;
break;
}
cout << endl;
cout << "Symbol Type: ";
switch (type) {
case IMAGE_SYM_TYPE_NULL:
cout << "NULL";
break;
case IMAGE_SYM_TYPE_VOID:
cout << "VOID";
break;
case IMAGE_SYM_TYPE_CHAR:
cout << "CHAR";
break;
case IMAGE_SYM_TYPE_SHORT:
cout << "SHORT";
break;
case IMAGE_SYM_TYPE_INT:
cout << "INT";
break;
case IMAGE_SYM_TYPE_LONG:
cout << "LONG";
break;
case IMAGE_SYM_TYPE_FLOAT:
cout << "FLOAT";
break;
case IMAGE_SYM_TYPE_DOUBLE:
cout << "DOUBLE";
break;
case IMAGE_SYM_TYPE_STRUCT:
cout << "STRUCT";
break;
case IMAGE_SYM_TYPE_UNION:
cout << "UNION";
break;
case IMAGE_SYM_TYPE_ENUM:
cout << "ENUM";
break;
case IMAGE_SYM_TYPE_MOE:
cout << "IMAGE_SYM_TYPE_MOE";
break;
case IMAGE_SYM_TYPE_BYTE:
cout << "BYTE";
break;
case IMAGE_SYM_TYPE_WORD:
cout << "WORD";
break;
case IMAGE_SYM_TYPE_UINT:
cout << "UINT";
break;
case IMAGE_SYM_TYPE_DWORD:
cout << "DWORD";
break;
}
cout << endl;
cout << "Symbol Storage Class: ";
switch (storageClass) {
case IMAGE_SYM_CLASS_END_OF_FUNCTION:
cout << "FUNCTION";
break;
case IMAGE_SYM_CLASS_NULL:
cout << "NULL";
break;
case IMAGE_SYM_CLASS_AUTOMATIC:
cout << "AUTOMATIC";
break;
case IMAGE_SYM_CLASS_EXTERNAL:
cout << "EXTERNAL";
break;
case IMAGE_SYM_CLASS_STATIC:
cout << "STATIC";
break;
case IMAGE_SYM_CLASS_REGISTER:
cout << "REGISTER";
break;
case IMAGE_SYM_CLASS_EXTERNAL_DEF:
cout << "EXTERNAL DEF";
break;
case IMAGE_SYM_CLASS_LABEL:
cout << "LABEL";
break;
case IMAGE_SYM_CLASS_UNDEFINED_LABEL:
cout << "UNDEFINED LABEL";
break;
case IMAGE_SYM_CLASS_MEMBER_OF_STRUCT:
cout << "MEMBER OF STRUCT";
break;
}
cout << endl;
cout << "Symbol Number of Aux Symbols: " << (uint32_t) numberOfAuxSymbols << endl;
return 0 ;
}
int printRsrc(void *N,
resource r)
{
@ -200,6 +316,8 @@ int main(int argc, char *argv[]) {
IterImpVAString(p, printImports, NULL);
cout << "Relocations: " << endl;
IterRelocs(p, printRelocs, NULL);
cout << "Symbols (symbol table): " << endl;
IterSymbols(p, printSymbols, NULL);
cout << "Sections: " << endl;
IterSec(p, printSecs, NULL);
cout << "Exports: " << endl;

View File

@ -24,6 +24,7 @@ THE SOFTWARE.
#include <fstream>
#include <string.h>
#include <unistd.h>
#include "parse.h"
#ifdef WIN32

View File

@ -38,6 +38,7 @@ const std::uint16_t NUM_DIR_ENTRIES = 16;
const std::uint16_t NT_OPTIONAL_32_MAGIC = 0x10B;
const std::uint16_t NT_OPTIONAL_64_MAGIC = 0x20B;
const std::uint16_t NT_SHORT_NAME_LEN = 8;
const std::uint16_t SYMTAB_RECORD_LEN = 18;
const std::uint16_t DIR_EXPORT = 0;
const std::uint16_t DIR_IMPORT = 1;
const std::uint16_t DIR_RESOURCE = 2;
@ -93,6 +94,62 @@ const std::uint32_t IMAGE_SCN_MEM_EXECUTE = 0x20000000;
const std::uint32_t IMAGE_SCN_MEM_READ = 0x40000000;
const std::uint32_t IMAGE_SCN_MEM_WRITE = 0x80000000;
// Symbol section number values
const std::int16_t IMAGE_SYM_UNDEFINED = 0;
const std::int16_t IMAGE_SYM_ABSOLUTE = -1;
const std::int16_t IMAGE_SYM_DEBUG = -2;
// Symbol table types
const std::uint16_t IMAGE_SYM_TYPE_NULL = 0;
const std::uint16_t IMAGE_SYM_TYPE_VOID = 1;
const std::uint16_t IMAGE_SYM_TYPE_CHAR = 2;
const std::uint16_t IMAGE_SYM_TYPE_SHORT = 3;
const std::uint16_t IMAGE_SYM_TYPE_INT = 4;
const std::uint16_t IMAGE_SYM_TYPE_LONG = 5;
const std::uint16_t IMAGE_SYM_TYPE_FLOAT = 6;
const std::uint16_t IMAGE_SYM_TYPE_DOUBLE = 7;
const std::uint16_t IMAGE_SYM_TYPE_STRUCT = 8;
const std::uint16_t IMAGE_SYM_TYPE_UNION = 9;
const std::uint16_t IMAGE_SYM_TYPE_ENUM = 10;
const std::uint16_t IMAGE_SYM_TYPE_MOE = 11;
const std::uint16_t IMAGE_SYM_TYPE_BYTE = 12;
const std::uint16_t IMAGE_SYM_TYPE_WORD = 13;
const std::uint16_t IMAGE_SYM_TYPE_UINT = 14;
const std::uint16_t IMAGE_SYM_TYPE_DWORD = 15;
const std::uint16_t IMAGE_SYM_DTYPE_NULL = 0;
const std::uint16_t IMAGE_SYM_DTYPE_POINTER = 1;
const std::uint16_t IMAGE_SYM_DTYPE_FUNCTION = 2;
const std::uint16_t IMAGE_SYM_DTYPE_ARRAY = 3;
// Symbol table storage classes
const std::uint8_t IMAGE_SYM_CLASS_END_OF_FUNCTION = -1;
const std::uint8_t IMAGE_SYM_CLASS_NULL = 0;
const std::uint8_t IMAGE_SYM_CLASS_AUTOMATIC = 1;
const std::uint8_t IMAGE_SYM_CLASS_EXTERNAL = 2;
const std::uint8_t IMAGE_SYM_CLASS_STATIC = 3;
const std::uint8_t IMAGE_SYM_CLASS_REGISTER = 4;
const std::uint8_t IMAGE_SYM_CLASS_EXTERNAL_DEF = 5;
const std::uint8_t IMAGE_SYM_CLASS_LABEL = 6;
const std::uint8_t IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7;
const std::uint8_t IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8;
const std::uint8_t IMAGE_SYM_CLASS_ARGUMENT = 9;
const std::uint8_t IMAGE_SYM_CLASS_STRUCT_TAG = 10;
const std::uint8_t IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11;
const std::uint8_t IMAGE_SYM_CLASS_UNION_TAG = 12;
const std::uint8_t IMAGE_SYM_CLASS_TYPE_DEFINITION = 13;
const std::uint8_t IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14;
const std::uint8_t IMAGE_SYM_CLASS_ENUM_TAG = 15;
const std::uint8_t IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16;
const std::uint8_t IMAGE_SYM_CLASS_REGISTER_PARAM = 17;
const std::uint8_t IMAGE_SYM_CLASS_BIT_FIELD = 18;
const std::uint8_t IMAGE_SYM_CLASS_BLOCK = 100;
const std::uint8_t IMAGE_SYM_CLASS_FUNCTION = 101;
const std::uint8_t IMAGE_SYM_CLASS_END_OF_STRUCT = 102;
const std::uint8_t IMAGE_SYM_CLASS_FILE = 103;
const std::uint8_t IMAGE_SYM_CLASS_SECTION = 104;
const std::uint8_t IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105;
const std::uint8_t IMAGE_SYM_CLASS_CLR_TOKEN = 107;
struct dos_header {
std::uint16_t e_magic;
std::uint16_t e_cblp;

View File

@ -58,12 +58,68 @@ struct reloc {
reloc_type type;
};
#define SYMBOL_NAME_OFFSET(sn) ((uint32_t) (sn.data >> 32))
#define SYMBOL_TYPE_HI(x) (x.type >> 8)
union symbol_name {
uint8_t shortName[NT_SHORT_NAME_LEN];
uint32_t zeroes;
uint64_t data;
};
struct aux_symbol_f1 {
uint32_t tagIndex;
uint32_t totalSize;
uint32_t pointerToLineNumber;
uint32_t pointerToNextFunction;
};
struct aux_symbol_f2 {
uint16_t lineNumber;
uint32_t pointerToNextFunction;
};
struct aux_symbol_f3 {
uint32_t tagIndex;
uint32_t characteristics;
};
struct aux_symbol_f4 {
uint8_t filename[SYMTAB_RECORD_LEN];
string strFilename;
};
struct aux_symbol_f5 {
uint32_t length;
uint16_t numberOfRelocations;
uint16_t numberOfLineNumbers;
uint32_t checkSum;
uint16_t number;
uint8_t selection;
};
struct symbol {
string strName;
symbol_name name;
uint32_t value;
int16_t sectionNumber;
uint16_t type;
uint8_t storageClass;
uint8_t numberOfAuxSymbols;
list<aux_symbol_f1> aux_symbols_f1;
list<aux_symbol_f2> aux_symbols_f2;
list<aux_symbol_f3> aux_symbols_f3;
list<aux_symbol_f4> aux_symbols_f4;
list<aux_symbol_f5> aux_symbols_f5;
};
struct parsed_pe_internal {
list<section> secs;
list<resource> rsrcs;
list<importent> imports;
list<reloc> relocs;
list<exportent> exports;
list<symbol> symbols;
};
::uint32_t err = 0;
@ -614,6 +670,285 @@ bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) {
return true;
}
bool getSymbolTable(parsed_pe *p) {
if (p->peHeader.nt.FileHeader.PointerToSymbolTable == 0) {
return true;
}
uint32_t strTableOffset = p->peHeader.nt.FileHeader.PointerToSymbolTable
+ (p->peHeader.nt.FileHeader.NumberOfSymbols * SYMTAB_RECORD_LEN);
uint32_t offset = p->peHeader.nt.FileHeader.PointerToSymbolTable;
for (uint32_t i = 0; i < p->peHeader.nt.FileHeader.NumberOfSymbols; i++) {
symbol sym;
// Read name
if (readQword(p->fileBuffer, offset, sym.name.data) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
if (sym.name.zeroes == 0) {
// The symbol name is greater than 8 bytes so it is stored in the string
// table. In this case instead of name, an offset of the string in the
// string table is provided.
uint32_t strOffset = strTableOffset + SYMBOL_NAME_OFFSET(sym.name);
uint8_t ch;
for (;;) {
if (readByte(p->fileBuffer, strOffset, ch) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
if (!ch) {
break;
}
sym.strName.push_back((char) ch);
strOffset += sizeof(uint8_t);
}
} else {
for (uint8_t n = 0; n < NT_SHORT_NAME_LEN; n++) {
sym.strName.push_back((char) (sym.name.shortName[n]));
}
}
offset += sizeof(uint64_t);
// Read value
if (readDword(p->fileBuffer, offset, sym.value) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint32_t);
// Read section number
uint16_t secNum;
if (readWord(p->fileBuffer, offset, secNum) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
sym.sectionNumber = (int16_t) secNum;
offset += sizeof(uint16_t);
// Read type
if (readWord(p->fileBuffer, offset, sym.type) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint16_t);
// Read storage class
if (readByte(p->fileBuffer, offset, sym.storageClass) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint8_t);
// Read number of auxiliary symbols
if (readByte(p->fileBuffer, offset, sym.numberOfAuxSymbols) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Set offset to next symbol
offset += sizeof(uint8_t);
// Save the symbol
p->internal->symbols.push_back(sym);
if (sym.numberOfAuxSymbols == 0) {
continue;
}
// Read auxiliary symbol records
if (sym.storageClass == IMAGE_SYM_CLASS_EXTERNAL &&
SYMBOL_TYPE_HI(sym) == 0x20 &&
sym.sectionNumber > 0) {
// Auxiliary Format 1: Function Definitions
for (uint8_t n = 0; n < sym.numberOfAuxSymbols; n++) {
aux_symbol_f1 asym;
// Read tag index
if (readDword(p->fileBuffer, offset, asym.tagIndex) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint32_t);
// Read total size
if (readDword(p->fileBuffer, offset, asym.totalSize) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint32_t);
// Read pointer to line number
if (readDword(p->fileBuffer, offset, asym.pointerToLineNumber) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint32_t);
// Read pointer to next function
if (readDword(p->fileBuffer, offset, asym.pointerToNextFunction) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Skip the processed 4 bytes + unused 2 bytes
offset += sizeof(uint8_t) * 6;
// Save the record
sym.aux_symbols_f1.push_back(asym);
}
} else if (sym.storageClass == IMAGE_SYM_CLASS_FUNCTION) {
// Auxiliary Format 2: .bf and .ef Symbols
for (uint8_t n = 0; n < sym.numberOfAuxSymbols; n++) {
aux_symbol_f2 asym;
// Skip unused 4 bytes
offset += sizeof(uint32_t);
// Read line number
if (readWord(p->fileBuffer, offset, asym.lineNumber) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Skip unused 6 bytes
offset += sizeof(uint8_t) * 6;
// Read pointer to next function
if (readDword(p->fileBuffer, offset, asym.pointerToNextFunction) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Skip the processed 4 bytes + unused 2 bytes
offset += sizeof(uint8_t) * 6;
// Save the record
sym.aux_symbols_f2.push_back(asym);
}
} else if (sym.storageClass == IMAGE_SYM_CLASS_EXTERNAL &&
sym.sectionNumber == IMAGE_SYM_UNDEFINED &&
sym.value == 0) {
// Auxiliary Format 3: Weak Externals
for (uint8_t n = 0; n < sym.numberOfAuxSymbols; n++) {
aux_symbol_f3 asym;
// Read line number
if (readDword(p->fileBuffer, offset, asym.tagIndex) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Read characteristics
if (readDword(p->fileBuffer, offset, asym.characteristics) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Skip unused 10 bytes
offset += sizeof(uint8_t) * 10;
// Save the record
sym.aux_symbols_f3.push_back(asym);
}
} else if (sym.storageClass == IMAGE_SYM_CLASS_FILE) {
// Auxiliary Format 4: Files
for (uint8_t n = 0; n < sym.numberOfAuxSymbols; n++) {
aux_symbol_f4 asym;
// Read filename
for (uint16_t j = 0; j < SYMTAB_RECORD_LEN; j++) {
if (readByte(p->fileBuffer, offset, asym.filename[j]) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
asym.strFilename.push_back((char) asym.filename[j]);
}
// Save the record
sym.aux_symbols_f4.push_back(asym);
}
} else if (sym.storageClass == IMAGE_SYM_CLASS_STATIC) {
// Auxiliary Format 5: Section Definitions
for (uint8_t n = 0; n < sym.numberOfAuxSymbols; n++) {
aux_symbol_f5 asym;
// Read length
if (readDword(p->fileBuffer, offset, asym.length) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint32_t);
// Read number of relocations
if (readWord(p->fileBuffer, offset, asym.numberOfRelocations) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint16_t);
// Read number of line numbers
if (readWord(p->fileBuffer, offset, asym.numberOfLineNumbers) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint16_t);
// Read checksum
if (readDword(p->fileBuffer, offset, asym.checkSum) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
offset += sizeof(uint32_t);
// Read number
if (readWord(p->fileBuffer, offset, asym.number) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Read selection
if (readByte(p->fileBuffer, offset, asym.selection) == false) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Skip unused 3 bytes
offset += sizeof(uint8_t) * 3;
// Save the record
sym.aux_symbols_f5.push_back(asym);
}
} else {
// Skip an unknown auxiliary record types
offset += sizeof(uint8_t) * SYMTAB_RECORD_LEN * sym.numberOfAuxSymbols;
}
}
return true;
}
parsed_pe *ParsePEFromFile(const char *filePath) {
//first, create a new parsed_pe structure
parsed_pe *p = new parsed_pe();
@ -1380,6 +1715,15 @@ parsed_pe *ParsePEFromFile(const char *filePath) {
} while(true);
}
// Get symbol table
if (getSymbolTable(p) == false) {
deleteBuffer(remaining);
deleteBuffer(p->fileBuffer);
delete p;
return NULL;
}
deleteBuffer(remaining);
return p;
@ -1424,6 +1768,21 @@ void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) {
return;
}
// Iterate over symbols (symbol table) in the PE file
void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) {
list<symbol> &l = pe->internal->symbols;
for (list<symbol>::iterator it = l.begin(), e = l.end(); it != e; ++it) {
symbol s = *it;
if (cb(cbd, s.strName, s.value, s.sectionNumber, s.type, s.storageClass,
s.numberOfAuxSymbols) != 0) {
break;
}
}
return;
}
//iterate over the exports by VA
void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) {
list<exportent> &l = pe->internal->exports;

View File

@ -191,6 +191,10 @@ void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd);
typedef int (*iterReloc)(void *, VA, reloc_type);
void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd);
// Iterate over symbols (symbol table) in the PE file
typedef int (*iterSymbol)(void *, std::string &, uint32_t &, int16_t &, uint16_t &, uint8_t &, uint8_t &);
void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd);
//iterate over the exports
typedef int (*iterExp)(void *, VA, std::string &, std::string &);
void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd);