4
0
mirror of https://github.com/QuasarApp/pe-parse.git synced 2025-05-08 09:39:34 +00:00

Merge remote-tracking branch 'r/master'

This commit is contained in:
Andrei Yankovich 2019-10-16 12:01:56 +03:00
commit b69a5fdcbb
13 changed files with 397 additions and 17 deletions

@ -45,6 +45,12 @@ cmake --build . --config Release
cmake --build . --config Release --target install
```
PE files that have a Resource section with strings for the Type are encoded in UTF-16, but that `std::string` expects UTF-8. Some cross-platform solution
is desired. You can let cmake choose one it finds in your build environment or you can choose one from the following options yourself and specify it with
the `-DUNICODE_LIBRARY` argument when generating the project files with cmake:
* `icu` (preferred) - "[ICU](http://site.icu-project.org/) is a mature, widely used set of C/C++ and Java libraries providing Unicode and Globalization support for software applications"
* `codecvt` - A C++ library header file ([now deprecated](http://open-std.org/JTC1/SC22/WG21/docs/papers/2017/p0618r0.html)) supported by some C++ runtimes
### Notes about Windows
If you are building on Windows with Visual Studio, the generator option can be used to select the compiler version and the output architecture:
@ -57,6 +63,11 @@ cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release ..
cmake -G "Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=Release ..
```
Visual Studio 2015 or higher is required to use codecvt, but you also have the option of using [ICU](http://site.icu-project.org/). The easiest way to
get started with ICU in Windows is with [vcpkg](https://vcpkg.readthedocs.io/): `vcpkg install icu`. Then add the
`-DCMAKE_TOOLCHAIN_FILE=C:\src\vcpkg\scripts\buildsystems\vcpkg.cmake` argument when generating the project files with cmake to add the appropriate
library and include directories to the project.
Using the library
=======
Once the library is installed, linking to it is easy! Add the following lines in your CMake project:

@ -11,6 +11,7 @@ if (MSVC)
else ()
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
if (NOT MINGW)

@ -227,6 +227,17 @@ int printSymbols(void *N,
return 0;
}
int printRich(void *N, rich_entry r) {
static_cast<void>(N);
std::cout << std::setw(10) << "ProdId:" << std::setw(7) << r.ProductId;
std::cout << std::setw(10) << "Build:" << std::setw(7) << r.BuildNumber;
std::cout << std::setw(10) << "Name:"
<< std::setw(20) << GetRichProductName(r.ProductId, r.BuildNumber);
std::cout << std::setw(10) << "Count:" << std::setw(7) << r.Count << "\n";
return 0;
}
int printRsrc(void *N, resource r) {
static_cast<void>(N);
@ -288,6 +299,13 @@ int main(int argc, char *argv[]) {
parsed_pe *p = ParsePEFromFile(argv[1]);
if (p != NULL) {
// Print Rich header info
if(p->peHeader.rich.isPresent) {
std::cout << "Rich header: present\n";
IterRich(p, printRich, NULL);
} else {
std::cout << "Rich header: not present\n";
}
// print out some things
DUMP_FIELD(Signature);
DUMP_FIELD(FileHeader.Machine);

@ -1,6 +1,15 @@
cmake_minimum_required(VERSION 3.7)
project(pe-parser-library)
set(UNICODE_LIBRARY "any" CACHE STRING "Select a unicode library")
set_property(CACHE UNICODE_LIBRARY PROPERTY STRINGS "any" "icu" "codecvt")
# This variable is used twice so setting once at the top here to prevent
# the chance they get out of sync.
# This is the minimum "required" version but there's a good chance early
# versions of ICU support the simple functionality needed by this project.
set(ICU_MINIMUM_REQUIRED 55.0)
# List all files explicitly; this will make IDEs happy (i.e. QtCreator, CLion, ...)
list(APPEND PEPARSERLIB_SOURCEFILES
include/parser-library/parse.h
@ -11,9 +20,57 @@ list(APPEND PEPARSERLIB_SOURCEFILES
src/parse.cpp
)
# Check for codecvt support. Likely the proper way to do this would be to
# use CMake system inspection via methods like "try_compile" to determine
# if the "#include <codecvt>" directive compiles successfully.
if (MSVC)
if (MSVC_VERSION LESS 1900)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
else ()
find_path(CODECVT_INCLUDE_DIR NAMES "codecvt")
if (CODECVT_INCLUDE_DIR)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
endif ()
if(${UNICODE_LIBRARY} MATCHES "icu")
find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc REQUIRED)
add_definitions(-DUSE_ICU4C)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
elseif(${UNICODE_LIBRARY} MATCHES "codecvt")
if(NOT CODECVT_SUPPORTED)
message(SEND_ERROR "codecvt header not found")
endif()
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
else()
find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc)
if(ICU_FOUND)
add_definitions(-DUSE_ICU4C)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
elseif(CODECVT_SUPPORTED)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
else()
message(SEND_ERROR "unable to find codecvt header or ICU library (hint: try installing libicu-dev)")
endif(ICU_FOUND)
endif()
add_library(${PROJECT_NAME} ${PEPARSERLIB_SOURCEFILES})
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
if(ICU_FOUND)
target_link_libraries(${PROJECT_NAME} ICU::uc)
endif()
install(TARGETS ${PROJECT_NAME}
RUNTIME DESTINATION "bin"

@ -26,6 +26,7 @@ THE SOFTWARE.
#include <cstdint>
#include <string>
#include <vector>
#define _offset(t, f) \
static_cast<std::uint32_t>( \
@ -36,6 +37,9 @@ THE SOFTWARE.
// some constant definitions
// clang-format off
namespace peparse {
constexpr std::uint32_t RICH_MAGIC_END = 0x68636952;
constexpr std::uint32_t RICH_MAGIC_START = 0x536e6144;
constexpr std::uint32_t RICH_OFFSET = 0x80;
constexpr std::uint16_t MZ_MAGIC = 0x5A4D;
constexpr std::uint32_t NT_MAGIC = 0x00004550;
constexpr std::uint16_t NUM_DIR_ENTRIES = 16;
@ -193,7 +197,7 @@ constexpr std::uint16_t IMAGE_SYM_DTYPE_FUNCTION = 2;
constexpr std::uint16_t IMAGE_SYM_DTYPE_ARRAY = 3;
// Symbol table storage classes
constexpr std::uint8_t IMAGE_SYM_CLASS_END_OF_FUNCTION = static_cast<const std::uint8_t>(-1);
constexpr std::uint8_t IMAGE_SYM_CLASS_END_OF_FUNCTION = static_cast<std::uint8_t>(-1);
constexpr std::uint8_t IMAGE_SYM_CLASS_NULL = 0;
constexpr std::uint8_t IMAGE_SYM_CLASS_AUTOMATIC = 1;
constexpr std::uint8_t IMAGE_SYM_CLASS_EXTERNAL = 2;
@ -338,6 +342,20 @@ struct nt_header_32 {
std::uint16_t OptionalMagic;
};
struct rich_entry {
std::uint16_t ProductId;
std::uint16_t BuildNumber;
std::uint32_t Count;
};
struct rich_header {
std::uint32_t StartSignature;
std::vector<rich_entry> Entries;
std::uint32_t EndSignature;
std::uint32_t DecryptionKey;
bool isPresent;
};
/*
* This structure is only used to know how far to move the offset
* when parsing resources. The data is stored in a resource_dir_entry

@ -25,6 +25,7 @@ THE SOFTWARE.
#pragma once
#include <cstdint>
#include <map>
#include <string>
#include "nt-headers.h"
@ -142,6 +143,7 @@ bool readByte(bounded_buffer *b, std::uint32_t offset, std::uint8_t &out);
bool readWord(bounded_buffer *b, std::uint32_t offset, std::uint16_t &out);
bool readDword(bounded_buffer *b, std::uint32_t offset, std::uint32_t &out);
bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out);
bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out);
bounded_buffer *readFileToFileBuffer(const char *filePath);
bounded_buffer *
@ -152,6 +154,7 @@ uint64_t bufLen(bounded_buffer *b);
struct parsed_pe_internal;
typedef struct _pe_header {
rich_header rich;
nt_header_32 nt;
} pe_header;
@ -161,6 +164,11 @@ typedef struct _parsed_pe {
pe_header peHeader;
} parsed_pe;
// Resolve a Rich header product id / build number pair to a known
// product name
typedef std::pair<std::uint16_t, std::uint16_t> ProductKey;
const std::string& GetRichProductName(std::uint16_t prodId, std::uint16_t buildNum);
// get parser error status as integer
std::uint32_t GetPEErr();
@ -176,6 +184,10 @@ parsed_pe *ParsePEFromFile(const char *filePath);
// destruct a PE context
void DestructParsedPE(parsed_pe *p);
// iterate over Rich header entries
typedef int (*iterRich)(void *, rich_entry);
void IterRich(parsed_pe *pe, iterRich cb, void *cbd);
// iterate over the resources
typedef int (*iterRsrc)(void *, resource);
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd);

@ -1,7 +1,15 @@
#pragma once
#include <string>
#include <sstream>
#ifdef USE_ICU4C
#include <unicode/unistr.h>
typedef std::basic_string<UChar> UCharString;
#else
typedef std::u16string UCharString;
#endif
namespace peparse {
template <class T>
static std::string to_string(T t, std::ios_base &(*f)(std::ios_base &) ) {
@ -9,4 +17,6 @@ static std::string to_string(T t, std::ios_base &(*f)(std::ios_base &) ) {
oss << f << t;
return oss.str();
}
std::string from_utf16(const UCharString &u);
} // namespace peparse

@ -164,6 +164,31 @@ bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out) {
return true;
}
bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out) {
if (b == nullptr) {
PE_ERR(PEERR_BUFFER);
return false;
}
if (offset + 1 >= b->bufLen) {
PE_ERR(PEERR_ADDRESS);
return false;
}
char16_t *tmp = nullptr;
if (b->swapBytes) {
std::uint8_t tmpBuf[2];
tmpBuf[0] = *(b->buf + offset + 1);
tmpBuf[1] = *(b->buf + offset);
tmp = reinterpret_cast<char16_t *>(tmpBuf);
} else {
tmp = reinterpret_cast<char16_t *>(b->buf + offset);
}
out = *tmp;
return true;
}
bounded_buffer *readFileToFileBuffer(const char *filePath) {
#ifdef _WIN32
HANDLE h = CreateFileA(filePath,

@ -122,6 +122,25 @@ struct parsed_pe_internal {
std::vector<symbol> symbols;
};
// The mapping of Rich header product id / build number pairs
// to strings
static const std::map<ProductKey, const std::string> ProductMap = {
{std::make_pair(1, 0), "Imported Functions"}
};
static const std::string kUnknownProduct = "<unknown>";
// Resolve a Rich header product id / build number pair to a known
// product name
const std::string& GetRichProductName(std::uint16_t prodId, std::uint16_t buildNum) {
auto it = ProductMap.find(std::make_pair(prodId, buildNum));
if (it != ProductMap.end()) {
return it->second;
} else {
return kUnknownProduct;
}
}
std::uint32_t err = 0;
std::string err_loc;
@ -243,6 +262,14 @@ bool getSecForVA(const std::vector<section> &secs, VA v, section &sec) {
return false;
}
void IterRich(parsed_pe *pe, iterRich cb, void *cbd) {
for (rich_entry r : pe->peHeader.rich.Entries) {
if (cb(cbd, r) != 0) {
break;
}
}
}
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
parsed_pe_internal *pint = pe->internal;
@ -256,19 +283,23 @@ void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
}
bool parse_resource_id(bounded_buffer *data, std::uint32_t id, std::string &result) {
std::uint8_t c;
std::uint16_t len;
if (!readWord(data, id, len)) {
return false;
}
id += 2;
for (std::uint32_t i = 0; i < len * 2U; i++) {
if (!readByte(data, id + i, c)) {
std::uint32_t rawSize = len * 2U;
UCharString rawString;
for (std::uint32_t i = 0; i < rawSize; i += 2) {
char16_t c;
if (!readChar16(data, id + i, c)) {
return false;
}
result.push_back(static_cast<char>(c));
rawString.push_back(c);
}
result = from_utf16(rawString);
return true;
}
@ -794,6 +825,83 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
return true;
}
bool readRichHeader(bounded_buffer *rich_buf, std::uint32_t key, rich_header &rich_hdr) {
if (rich_buf == nullptr) {
return false;
}
std::uint32_t encrypted_dword;
std::uint32_t decrypted_dword;
// Confirm DanS signature exists first.
// The first decrypted DWORD value of the rich header
// at offset 0 should be 0x536e6144 aka the "DanS" signature
if (!readDword(rich_buf, 0, encrypted_dword)) {
PE_ERR(PEERR_READ);
return false;
}
decrypted_dword = encrypted_dword ^ key;
if (decrypted_dword == RICH_MAGIC_START) {
// DanS magic found
rich_hdr.isPresent = true;
rich_hdr.StartSignature = decrypted_dword;
} else {
// DanS magic not found
rich_hdr.isPresent = false;
return false;
}
// Iterate over the remaining entries.
// Start from buffer offset 16 because after "DanS" there
// are three DWORDs of zero padding that can be skipped over.
// a DWORD is 4 bytes. Loop is incrementing 8 bytes, however
// we are reading two DWORDS at a time, which is the size
// of one rich header entry.
for (std::uint32_t i = 16; i < rich_buf->bufLen-8; i += 8) {
rich_entry entry;
// Read first DWORD of entry and decrypt it
if (!readDword(rich_buf, i, encrypted_dword)) {
PE_ERR(PEERR_READ);
return false;
}
decrypted_dword = encrypted_dword ^ key;
// The high WORD of the first DWORD is the Product ID
entry.ProductId = (decrypted_dword & 0xFFFF0000) >> 16;
// The low WORD of the first DWORD is the Build Number
entry.BuildNumber = (decrypted_dword & 0xFFFF);
// The second DWORD represents the use count
if (!readDword(rich_buf, i+4, encrypted_dword)) {
PE_ERR(PEERR_READ);
return false;
}
decrypted_dword = encrypted_dword ^ key;
// The full 32-bit DWORD is the count
entry.Count = decrypted_dword;
// Preserve the individual entry
rich_hdr.Entries.push_back(entry);
}
// Preserve the end signature aka "Rich" magic
if (!readDword(rich_buf, rich_buf->bufLen-4, rich_hdr.EndSignature)) {
PE_ERR(PEERR_READ);
return false;
};
if (rich_hdr.EndSignature != RICH_MAGIC_END) {
PE_ERR(PEERR_MAGIC);
return false;
}
// Preserve the decryption key
rich_hdr.DecryptionKey = key;
return true;
}
bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) {
if (file == nullptr) {
return false;
@ -819,6 +927,53 @@ bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) {
}
curOffset += offset;
// read rich header
std::uint32_t dword;
std::uint32_t rich_end_signature_offset;
std::uint32_t xor_key;
bool found_rich = false;
// Start reading from RICH_OFFSET (0x80), a known Rich header offset.
// Note: 0x80 is based on anecdotal evidence.
//
// Iterate over the DWORDs, hence why i increments 4 bytes at a time.
for (std::uint32_t i = RICH_OFFSET; i < offset; i += 4) {
if (!readDword(file, i, dword)) {
PE_ERR(PEERR_READ);
return false;
}
// Found the trailing Rich signature
if (dword == RICH_MAGIC_END) {
found_rich = true;
rich_end_signature_offset = i;
break;
}
}
if (found_rich) {
// Get the XOR decryption key. It is the DWORD immediately
// after the Rich signature.
if (!readDword(file, rich_end_signature_offset + 4, xor_key)) {
PE_ERR(PEERR_READ);
return false;
}
// Split the Rich header out into its own buffer
bounded_buffer *richBuf = splitBuffer(file, 0x80, rich_end_signature_offset + 4);
if (richBuf == nullptr) {
return false;
}
readRichHeader(richBuf, xor_key, p.rich);
if (richBuf != nullptr) {
deleteBuffer(richBuf);
}
} else {
p.rich.isPresent = false;
}
// now, we can read out the fields of the NT headers
bounded_buffer *ntBuf = splitBuffer(file, curOffset, file->bufLen);

@ -0,0 +1,47 @@
/*
The MIT License (MIT)
Copyright (c) 2019 Trail of Bits, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <parser-library/to_string.h>
#include <locale>
#include <codecvt>
namespace peparse {
// See https://stackoverflow.com/questions/38688417/utf-conversion-functions-in-c11
std::string from_utf16(const UCharString &u)
{
#if defined(_MSC_VER)
// std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>convert; // Doesn't compile with Visual Studio.
// See https://stackoverflow.com/questions/32055357/visual-studio-c-2015-stdcodecvt-with-char16-t-or-char32-t
std::wstring_convert<std::codecvt_utf8<std::int16_t>, std::int16_t> convert;
auto p = reinterpret_cast<const std::int16_t *>(u.data());
return convert.to_bytes(p, p + u.size());
#else
// -std=c++11 or -std=c++14
// Requires GCC 5 or higher
// Requires Clang ??? or higher (tested on Clang 3.8, 5.0, 6.0)
std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
return convert.to_bytes(u);
#endif
}
} // namespace peparse

@ -0,0 +1,36 @@
/*
The MIT License (MIT)
Copyright (c) 2019 Trail of Bits, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <parser-library/to_string.h>
#include <unicode/unistr.h>
namespace peparse {
std::string from_utf16(const UCharString &u)
{
icu::UnicodeString utf16_string = icu::UnicodeString(u.data(), u.length());
std::string result;
utf16_string.toUTF8String(result);
return result;
}
} // namespace peparse

@ -885,15 +885,6 @@ int resource_callback(void *cbd, resource r) {
* The tuple item order is important here. It is passed into the
* section type initialization and parsed there.
*/
#if PY_MAJOR_VERSION >= 3
tuple = Py_BuildValue("u#u#u#IIIIIIO&",
r.type_str.c_str(),
r.type_str.length() / 2,
r.name_str.c_str(),
r.name_str.length() / 2,
r.lang_str.c_str(),
r.lang_str.length() / 2,
#else
tuple = Py_BuildValue("s#s#s#IIIIIIO&",
r.type_str.c_str(),
r.type_str.length(),
@ -901,7 +892,6 @@ int resource_callback(void *cbd, resource r) {
r.name_str.length(),
r.lang_str.c_str(),
r.lang_str.length(),
#endif
r.type,
r.name,
r.lang,

@ -57,7 +57,7 @@ linux_initialize() {
fi
printf " > Installing the required packages...\n"
sudo apt-get install -qqy cmake python2.7 python-dev build-essential realpath > "${log_file}" 2>&1
sudo apt-get install -qqy cmake python2.7 python-dev build-essential realpath libicu-dev > "${log_file}" 2>&1
if [ $? -ne 0 ] ; then
printf " x Could not install the required dependencies\n\n\n"
cat "${log_file}"