Merge remote-tracking branch 'main/master' into HEAD

This commit is contained in:
Andrei Yankovich 2021-02-08 14:17:51 +03:00
commit e0dfc827b3
35 changed files with 1429 additions and 823 deletions

View File

@ -10,10 +10,29 @@ on:
- cron: '0 12 * * *'
jobs:
test:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: deps
run: |
sudo apt-get update
sudo apt-get install -y clang-format-9
- name: lint
run: |
mkdir build && cd build
cmake ..
cmake --build . --target peparse_format
cd .. && git diff --exit-code
pe-parse:
strategy:
matrix:
platform: ["ubuntu-latest", "macos-latest"]
build-type: ["Debug", "Release"]
build-shared: ["0", "1"]
compiler:
- { CC: "clang", CXX: "clang++" }
- { CC: "gcc", CXX: "g++" }
@ -23,27 +42,93 @@ jobs:
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v2
- name: Build C
- name: build
env:
CC: ${{ matrix.compiler.CC }}
CXX: ${{ matrix.compiler.CXX }}
run: |
mkdir build
cd build
cmake ..
make
- name: Build Python
cmake \
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DBUILD_SHARED_LIBS=${{ matrix.build-shared }} \
..
cmake --build .
- name: test
run: |
./build/dump-pe/dump-pe ./test/assets/example.exe
pepy:
strategy:
matrix:
platform: ["ubuntu-latest", "macos-latest"]
python:
- "3.6"
- "3.7"
- "3.8"
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}
- name: build
run: |
cd python
python2 setup.py build
python3 setup.py build
test-windows:
- name: sdist and install
run: |
python3 setup.py sdist
python3 -m pip install --user dist/*.tar.gz
- name: test
run: |
python3 test/test_pepy.py test/assets/example.exe
pe-parse-windows:
strategy:
matrix:
build-arch: ["x64", "Win32"]
build-type: ["Debug", "Release"]
build-shared: ["0", "1"]
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- name: Build C
- name: build
run: |
mkdir build
cd build
cmake -G "Visual Studio 16 2019" -A x64 ..
cmake --build .
cmake `
-G "Visual Studio 16 2019" `
-A ${{ matrix.build-arch }} `
-DBUILD_SHARED_LIBS=${{ matrix.build-shared }} `
..
cmake --build . --config ${{ matrix.build-type }}
- name: install
run: |
cd build
cmake --build . --target install
- name: test
run: |
.\build\bin\dump-pe.exe .\test\assets\example.exe
pepy-windows:
strategy:
matrix:
python:
- "3.6"
- "3.7"
- "3.8"
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}
- name: build
run: |
python setup.py build
- name: install
run: |
python -m pip install --user .
- name: test
run: |
python test/test_pepy.py test/assets/example.exe

34
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,34 @@
on:
push:
tags:
- 'v*'
name: release
jobs:
pypi:
name: upload release to PyPI
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
with:
python-version: 3.8
- name: create release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ github.ref }}
release_name: Release ${{ github.ref }}
draft: false
prerelease: ${{ contains(github.ref, 'pre') || contains(github.ref, 'rc') }}
- name: sdist
run: python3 setup.py sdist
- name: publish
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.PYPI_TOKEN }}

10
.gitignore vendored
View File

@ -1,8 +1,8 @@
*Makefile*
Makefile
cmake_install.cmake
dump-prog/dump-prog
*.swp
python/build
build/
.idea
cmake-build-debug
cmake-build-release
@ -11,8 +11,10 @@ CMakeSettings.json
.vs
.vscode
examples_build
.DS_Store
dist/
MANIFEST
*.egg-info/
*.stash
*.o

View File

@ -1,63 +0,0 @@
__build_stage_script: &__build_stage_script
stage: build
language: cpp
script:
- mkdir build && cd build
- cmake ..
- make
- cd ../python
- python2 setup.py build
- python3 setup.py build
addons:
apt:
packages:
- cmake
- python2.7
- python-dev
- build-essential
- realpath
- libicu-dev
homebrew:
packages:
- cmake
coverity_scan:
project:
name: "trailofbits/pe-parse"
description: "Principled, lightweight C/C++ PE parser"
notification_email: dan@trailofbits.com
build_command_prepend: mkdir cov_build && cd cov_build && cmake ..
build_command: make
branch_pattern: master
jobs:
include:
- stage: lint
language: minimal
dist: bionic
addons:
apt:
packages:
- clang-format-8
script:
- find . \( -name '*.h' \) -or \( -name '*.cpp' \) | xargs clang-format -i -style=file
- git diff --exit-code
- <<: *__build_stage_script
os: linux
compiler: clang
- <<: *__build_stage_script
os: linux
compiler: gcc
- <<: *__build_stage_script
os: osx
compiler: clang
- stage: build
os: windows
language: cpp
script:
- mkdir build && cd build
- cmake -G "Visual Studio 15 2017 Win64" ..
- cmake --build .
env:
global:
- secure: "O+BGqz4ugoVIJbQTh0dJjKRrsSVzkCYSe0WpRzEWK3l8Mw7hqX300g81TxRwTzN2zfUsROMzaeGaXWfGzYakgW59K1WIioaczxtv2MzzUQTbqzJPa+qQoP9bk/b2wJ5jcOL965/rudRju4UiIwuIgzDAMN3nAfIEJgV/2zANLIg="

View File

@ -1,11 +1,13 @@
cmake_minimum_required(VERSION 3.7)
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(pe-parse)
# NOTE(ww): CMake has bad defaults for install prefixes.
# Instead of fussing over them, install everything to the build directory by default
# and let the user set CMAKE_INSTALL_PREFIX explicitly for their own needs.
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "Default install directory" FORCE)
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}" CACHE PATH "Default install directory" FORCE)
endif ()
set(CMAKE_VERBOSE_MAKEFILE True)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
endif ()
@ -13,19 +15,45 @@ endif ()
include(cmake/compilation_flags.cmake)
list(APPEND GLOBAL_CXXFLAGS ${DEFAULT_CXX_FLAGS})
option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF)
option(BUILD_SHARED_LIBS "Build Shared Libraries" ON)
option(BUILD_COMMAND_LINE_TOOLS "Build Command Line Tools" ON)
option(PEPARSE_LIBRARY_WARNINGS "Log pe-parse library warnings to stderr" OFF)
if (MSVC)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif ()
file(READ "${PROJECT_SOURCE_DIR}/VERSION" PEPARSE_VERSION)
string(STRIP "${PEPARSE_VERSION}" PEPARSE_VERSION)
add_compile_definitions(PEPARSE_VERSION="${PEPARSE_VERSION}")
add_subdirectory(pe-parser-library)
if (BUILD_COMMAND_LINE_TOOLS)
add_subdirectory(dump-pe)
endif ()
# `peparse_format` target.
file(
GLOB_RECURSE
PEPARSE_ALL_SOURCES
pe-parser-library/*.cpp
pe-parser-library/*.h
pepy/*.cpp
pepy/*.h
dump-pe/*.cpp
examples/*.cpp
examples/*.h
)
add_custom_target(
peparse_format
COMMAND clang-format -i -style=file ${PEPARSE_ALL_SOURCES}
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
COMMENT "Auto-format the codebase with clang-format"
VERBATIM
)
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Build Shared: ${BUILD_SHARED_LIBS} ${BUILD_SHARED_LIBS_MESSAGE}")
message(STATUS "Build Command Line Tools: ${BUILD_COMMAND_LINE_TOOLS}")

View File

@ -7,15 +7,16 @@ LABEL creator "Trail of Bits"
LABEL dockerfile_maintenance "William Woodruff <william@trailofbits>"
LABEL desc "Principled, lightweight C/C++ PE parser"
RUN apk add --no-cache cmake icu-dev build-base
RUN apk add --no-cache cmake icu-dev clang build-base
COPY . /app/pe-parse
WORKDIR /app/pe-parse
ENV CC=clang CXX=clang++
RUN mkdir build && \
cd build && \
cmake -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" .. && \
cmake --build . --config "${BUILD_TYPE}" && \
cmake --build . --config "${BUILD_TYPE}" --target install
cmake --build . && \
cmake --build . --target install
ENTRYPOINT [ "/usr/bin/dump-pe" ]
CMD ["--help"]

3
MANIFEST.in Normal file
View File

@ -0,0 +1,3 @@
include VERSION
include pepy/README.md
include pe-parser-library/include/pe-parse/*.h

View File

@ -1,10 +1,15 @@
pe-parse
=========================================
========
[![Build Status](https://img.shields.io/github/workflow/status/trailofbits/pe-parse/CI/master)](https://github.com/trailofbits/pe-parse/actions?query=workflow%3ACI)
[![Coverity Scan Build Status](https://scan.coverity.com/projects/3671/badge.svg)](https://scan.coverity.com/projects/3671)
[![LGTM Total alerts](https://img.shields.io/lgtm/alerts/g/trailofbits/pe-parse.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/trailofbits/pe-parse/alerts/)
pe-parse is a principled, lightweight parser for windows portable executable files. It was created to assist in compiled program analysis, potentially of programs of unknown origins. This means that it should be resistant to malformed or maliciously crafted PE files, and it should support questions that analysis software would ask of an executable program container. For example, listing relocations, describing imports and exports, and supporting byte reads from virtual addresses as well as file offsets.
pe-parse is a principled, lightweight parser for Windows portable executable files.
It was created to assist in compiled program analysis, potentially of programs of unknown origins.
This means that it should be resistant to malformed or maliciously crafted PE files, and it should
support questions that analysis software would ask of an executable program container.
For example, listing relocations, describing imports and exports, and supporting byte reads from
virtual addresses as well as file offsets.
pe-parse supports these use cases via a minimal API that provides methods for
* Opening and closing a PE file
@ -16,21 +21,44 @@ pe-parse supports these use cases via a minimal API that provides methods for
* Reading bytes from specified virtual addresses
* Retrieving the program entry point
The interface is defined in `parser-library/parse.h`. The program in `dump-prog/dump.cpp` is an example of using the parser-library API to dump information about a PE file.
The interface is defined in `parser-library/parse.h`.
Internally, the parser-library uses a bounded buffer abstraction to access information stored in the PE file. This should help in constructing a sane parser that allows for detection of the use of bogus values in the PE that would result in out of bounds accesses of the input buffer. Once data is read from the file it is sanitized and placed in C++ STL containers of internal types.
The program in `dump-prog/dump.cpp` is an example of using the parser-library API to dump
information about a PE file.
Internally, the parser-library uses a bounded buffer abstraction to access information stored in
the PE file. This should help in constructing a sane parser that allows for detection of the use
of bogus values in the PE that would result in out of bounds accesses of the input buffer.
Once data is read from the file it is sanitized and placed in C++ STL containers of internal types.
## Installation
pe-parse can be installed via [vcpkg](https://github.com/microsoft/vcpkg):
```bash
$ vcpkg install pe-parse
```
pe-parse includes Python bindings via `pepy`, which can be installed via `pip`:
```bash
$ pip3 install pepy
```
More information about `pepy` can be found in its [README](./pepy/README.md).
## Dependencies
Dependencies
========
### CMake
* Debian/Ubuntu: `sudo apt-get install cmake`
* RedHat/Fedora: `sudo yum install cmake`
* OSX: `brew install cmake`
* Windows: Download the installer from the [CMake page](https://cmake.org/download/)
Building
========
## Building
### Generic instructions
```
git clone https://github.com/trailofbits/pe-parse.git
cd pe-parse
@ -39,48 +67,40 @@ mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
cmake --build . --config Release
cmake --build .
# optional
cmake --build . --config Release --target install
cmake --build . --target install
```
PE files that have a Resource section with strings for the Type are encoded in UTF-16, but that `std::string` expects UTF-8. Some cross-platform solution
is desired. You can let cmake choose one it finds in your build environment or you can choose one from the following options yourself and specify it with
the `-DUNICODE_LIBRARY` argument when generating the project files with cmake:
* `icu` (preferred) - "[ICU](http://site.icu-project.org/) is a mature, widely used set of C/C++ and Java libraries providing Unicode and Globalization support for software applications"
* `codecvt` - A C++ library header file ([now deprecated](http://open-std.org/JTC1/SC22/WG21/docs/papers/2017/p0618r0.html)) supported by some C++ runtimes
### Windows-specific
### Notes about Windows
If you are building on Windows with Visual Studio, the generator option can be used to select the compiler version and the output architecture:
VS 2017 and VS 2019 are supported.
```
# Compile 64-bit binaries with Visual Studio 2017
cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release ..
cmake -G "Visual Studio 15 2017 Win64" ..
# Compile 32-bit binaries with Visual Studio 2017
cmake -G "Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=Release ..
# Or, with VS 2019, use the -A flag for architecture
cmake -G "Visual Studio 16 2019" -A Win64 ..
# Pass the build type at build time
cmake --build . --config Release
```
Visual Studio 2015 or higher is required to use codecvt, but you also have the option of using [ICU](http://site.icu-project.org/). The easiest way to
get started with ICU in Windows is with [vcpkg](https://vcpkg.readthedocs.io/): `vcpkg install icu`. Then add the
`-DCMAKE_TOOLCHAIN_FILE=C:\src\vcpkg\scripts\buildsystems\vcpkg.cmake` argument when generating the project files with cmake to add the appropriate
library and include directories to the project.
## Using the library
Using the library
=======
Once the library is installed, linking to it is easy! Add the following lines in your CMake project:
```
find_package(peparse REQUIRED)
find_package(pe-parse REQUIRED)
target_link_libraries(your_target_name ${PEPARSE_LIBRARIES})
target_include_directories(your_target_name PRIVATE ${PEPARSE_INCLUDE_DIRS})
target_link_libraries(your_target_name PRIVATE pe-parse::pe-parse)
```
You can see a full example in the examples/peaddrconv folder.
You can see a full example in the [examples/peaddrconv](examples/peaddrconv) folder.
Authors
=======
pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com), with significant contributions from [Wesley Shields](https://github.com/wxsBSD).
## Authors
pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com), with significant
contributions from [Wesley Shields](https://github.com/wxsBSD).

1
VERSION Normal file
View File

@ -0,0 +1 @@
1.2.0

View File

@ -32,10 +32,8 @@ else ()
endif ()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "This is a debug build; enabling -Weverything...")
list(APPEND DEFAULT_CXX_FLAGS
-Weverything -Wno-c++98-compat -Wno-missing-prototypes
-Wno-c++98-compat -Wno-missing-prototypes
-Wno-missing-variable-declarations -Wno-global-constructors
-Wno-exit-time-destructors -Wno-padded -Wno-error
)

View File

@ -1,8 +1,8 @@
cmake_minimum_required(VERSION 3.7)
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(dump-pe)
add_executable(${PROJECT_NAME} main.cpp)
target_link_libraries(${PROJECT_NAME} PRIVATE pe-parser-library)
target_link_libraries(${PROJECT_NAME} PRIVATE pe-parse)
target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
install(TARGETS ${PROJECT_NAME} DESTINATION "bin")
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION "bin")

View File

@ -27,11 +27,16 @@ THE SOFTWARE.
#include <iostream>
#include <sstream>
#include <parser-library/parse.h>
#include <pe-parse/parse.h>
#include "vendor/argh.h"
using namespace peparse;
int printExps(void *N, VA funcAddr, std::string &mod, std::string &func) {
int printExps(void *N,
const VA &funcAddr,
const std::string &mod,
const std::string &func) {
static_cast<void>(N);
auto address = static_cast<std::uint32_t>(funcAddr);
@ -47,7 +52,7 @@ int printExps(void *N, VA funcAddr, std::string &mod, std::string &func) {
}
int printImports(void *N,
VA impAddr,
const VA &impAddr,
const std::string &modName,
const std::string &symName) {
static_cast<void>(N);
@ -59,7 +64,7 @@ int printImports(void *N,
return 0;
}
int printRelocs(void *N, VA relocAddr, reloc_type type) {
int printRelocs(void *N, const VA &relocAddr, const reloc_type &type) {
static_cast<void>(N);
std::cout << "TYPE: ";
@ -99,12 +104,12 @@ int printRelocs(void *N, VA relocAddr, reloc_type type) {
}
int printSymbols(void *N,
std::string &strName,
uint32_t &value,
int16_t &sectionNumber,
uint16_t &type,
uint8_t &storageClass,
uint8_t &numberOfAuxSymbols) {
const std::string &strName,
const uint32_t &value,
const int16_t &sectionNumber,
const uint16_t &type,
const uint8_t &storageClass,
const uint8_t &numberOfAuxSymbols) {
static_cast<void>(N);
std::cout << "Symbol Name: " << strName << "\n";
@ -227,7 +232,7 @@ int printSymbols(void *N,
return 0;
}
int printRich(void *N, rich_entry r) {
int printRich(void *N, const rich_entry &r) {
static_cast<void>(N);
std::cout << std::dec;
std::cout << std::setw(10) << "ProdId:" << std::setw(7) << r.ProductId;
@ -239,7 +244,7 @@ int printRich(void *N, rich_entry r) {
return 0;
}
int printRsrc(void *N, resource r) {
int printRsrc(void *N, const resource &r) {
static_cast<void>(N);
if (r.type_str.length())
@ -264,10 +269,10 @@ int printRsrc(void *N, resource r) {
}
int printSecs(void *N,
VA secBase,
std::string &secName,
image_section_header s,
bounded_buffer *data) {
const VA &secBase,
const std::string &secName,
const image_section_header &s,
const bounded_buffer *data) {
static_cast<void>(N);
static_cast<void>(s);
@ -292,14 +297,21 @@ int printSecs(void *N,
std::cout << std::boolalpha << static_cast<bool>(p->peHeader.x) << "\n";
int main(int argc, char *argv[]) {
if (argc != 2 || (argc == 2 && std::strcmp(argv[1], "--help") == 0)) {
argh::parser cmdl(argv);
if (cmdl[{"-h", "--help"}] || argc <= 1) {
std::cout << "dump-pe utility from Trail of Bits\n";
std::cout << "Repository: https://github.com/trailofbits/pe-parse\n\n";
std::cout << "Usage:\n\tdump-pe /path/to/executable.exe\n";
return 1;
return 0;
} else if (cmdl[{"-v", "--version"}]) {
std::cout << "dump-pe (pe-parse) version " << PEPARSE_VERSION << "\n";
return 0;
}
parsed_pe *p = ParsePEFromFile(argv[1]);
parsed_pe *p = ParsePEFromFile(cmdl[1].c_str());
if (p == nullptr) {
std::cout << "Error: " << GetPEErr() << " (" << GetPEErrString() << ")"
<< "\n";

463
dump-pe/vendor/argh.h vendored Normal file
View File

@ -0,0 +1,463 @@
/*
* Copyright (c) 2016, Adi Shavit
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <algorithm>
#include <sstream>
#include <limits>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <cassert>
namespace argh
{
// Terminology:
// A command line is composed of 2 types of args:
// 1. Positional args, i.e. free standing values
// 2. Options: args beginning with '-'. We identify two kinds:
// 2.1: Flags: boolean options => (exist ? true : false)
// 2.2: Parameters: a name followed by a non-option value
#if !defined(__GNUC__) || (__GNUC__ >= 5)
using string_stream = std::istringstream;
#else
// Until GCC 5, istringstream did not have a move constructor.
// stringstream_proxy is used instead, as a workaround.
class stringstream_proxy
{
public:
stringstream_proxy() = default;
// Construct with a value.
stringstream_proxy(std::string const& value) :
stream_(value)
{}
// Copy constructor.
stringstream_proxy(const stringstream_proxy& other) :
stream_(other.stream_.str())
{
stream_.setstate(other.stream_.rdstate());
}
void setstate(std::ios_base::iostate state) { stream_.setstate(state); }
// Stream out the value of the parameter.
// If the conversion was not possible, the stream will enter the fail state,
// and operator bool will return false.
template<typename T>
stringstream_proxy& operator >> (T& thing)
{
stream_ >> thing;
return *this;
}
// Get the string value.
std::string str() const { return stream_.str(); }
std::stringbuf* rdbuf() const { return stream_.rdbuf(); }
// Check the state of the stream.
// False when the most recent stream operation failed
explicit operator bool() const { return !!stream_; }
~stringstream_proxy() = default;
private:
std::istringstream stream_;
};
using string_stream = stringstream_proxy;
#endif
class parser
{
public:
enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0,
PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1,
NO_SPLIT_ON_EQUALSIGN = 1 << 2,
SINGLE_DASH_IS_MULTIFLAG = 1 << 3,
};
parser() = default;
parser(std::initializer_list<char const* const> pre_reg_names)
{ add_params(pre_reg_names); }
parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
{ parse(argv, mode); }
parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
{ parse(argc, argv, mode); }
void add_param(std::string const& name);
void add_params(std::initializer_list<char const* const> init_list);
void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
std::multiset<std::string> const& flags() const { return flags_; }
std::map<std::string, std::string> const& params() const { return params_; }
std::vector<std::string> const& pos_args() const { return pos_args_; }
// begin() and end() for using range-for over positional args.
std::vector<std::string>::const_iterator begin() const { return pos_args_.cbegin(); }
std::vector<std::string>::const_iterator end() const { return pos_args_.cend(); }
size_t size() const { return pos_args_.size(); }
//////////////////////////////////////////////////////////////////////////
// Accessors
// flag (boolean) accessors: return true if the flag appeared, otherwise false.
bool operator[](std::string const& name) const;
// multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false.
bool operator[](std::initializer_list<char const* const> init_list) const;
// returns positional arg string by order. Like argv[] but without the options
std::string const& operator[](size_t ind) const;
// returns a std::istream that can be used to convert a positional arg to a typed value.
string_stream operator()(size_t ind) const;
// same as above, but with a default value in case the arg is missing (index out of range).
template<typename T>
string_stream operator()(size_t ind, T&& def_val) const;
// parameter accessors, give a name get an std::istream that can be used to convert to a typed value.
// call .str() on result to get as string
string_stream operator()(std::string const& name) const;
// accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value.
// call .str() on result to get as string
// returns the first value in the list to be found.
string_stream operator()(std::initializer_list<char const* const> init_list) const;
// same as above, but with a default value in case the param was missing.
// Non-string def_val types must have an operator<<() (output stream operator)
// If T only has an input stream operator, pass the string version of the type as in "3" instead of 3.
template<typename T>
string_stream operator()(std::string const& name, T&& def_val) const;
// same as above but for a list of names. returns the first value to be found.
template<typename T>
string_stream operator()(std::initializer_list<char const* const> init_list, T&& def_val) const;
private:
string_stream bad_stream() const;
std::string trim_leading_dashes(std::string const& name) const;
bool is_number(std::string const& arg) const;
bool is_option(std::string const& arg) const;
bool got_flag(std::string const& name) const;
bool is_param(std::string const& name) const;
private:
std::vector<std::string> args_;
std::map<std::string, std::string> params_;
std::vector<std::string> pos_args_;
std::multiset<std::string> flags_;
std::set<std::string> registeredParams_;
std::string empty_;
};
//////////////////////////////////////////////////////////////////////////
inline void parser::parse(const char * const argv[], int mode)
{
int argc = 0;
for (auto argvp = argv; *argvp; ++argc, ++argvp);
parse(argc, argv, mode);
}
//////////////////////////////////////////////////////////////////////////
inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/)
{
// convert to strings
args_.resize(static_cast<decltype(args_)::size_type>(argc));
std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg; });
// parse line
for (auto i = 0u; i < args_.size(); ++i)
{
if (!is_option(args_[i]))
{
pos_args_.emplace_back(args_[i]);
continue;
}
auto name = trim_leading_dashes(args_[i]);
if (!(mode & NO_SPLIT_ON_EQUALSIGN))
{
auto equalPos = name.find('=');
if (equalPos != std::string::npos)
{
params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) });
continue;
}
}
// if the option is unregistered and should be a multi-flag
if (1 == (args_[i].size() - name.size()) && // single dash
argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode
!is_param(name)) // unregistered
{
std::string keep_param;
if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param
{
keep_param += name.back();
name.resize(name.size() - 1);
}
for (auto const& c : name)
{
flags_.emplace(std::string{ c });
}
if (!keep_param.empty())
{
name = keep_param;
}
else
{
continue; // do not consider other options for this arg
}
}
// any potential option will get as its value the next arg, unless that arg is an option too
// in that case it will be determined a flag.
if (i == args_.size() - 1 || is_option(args_[i + 1]))
{
flags_.emplace(name);
continue;
}
// if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped
// otherwise we have 2 modes:
// PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag.
// The following value (the next arg) will be a free parameter.
//
// PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg
// will be the value of that option.
assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION)
|| !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION));
bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION;
if (is_param(name) || preferParam)
{
params_.insert({ name, args_[i + 1] });
++i; // skip next value, it is not a free parameter
continue;
}
else
{
flags_.emplace(name);
}
};
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::bad_stream() const
{
string_stream bad;
bad.setstate(std::ios_base::failbit);
return bad;
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::is_number(std::string const& arg) const
{
// inefficient but simple way to determine if a string is a number (which can start with a '-')
std::istringstream istr(arg);
double number;
istr >> number;
return !(istr.fail() || istr.bad());
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::is_option(std::string const& arg) const
{
assert(0 != arg.size());
if (is_number(arg))
return false;
return '-' == arg[0];
}
//////////////////////////////////////////////////////////////////////////
inline std::string parser::trim_leading_dashes(std::string const& name) const
{
auto pos = name.find_first_not_of('-');
return std::string::npos != pos ? name.substr(pos) : name;
}
//////////////////////////////////////////////////////////////////////////
inline bool argh::parser::got_flag(std::string const& name) const
{
return flags_.end() != flags_.find(trim_leading_dashes(name));
}
//////////////////////////////////////////////////////////////////////////
inline bool argh::parser::is_param(std::string const& name) const
{
return registeredParams_.count(name);
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::operator[](std::string const& name) const
{
return got_flag(name);
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::operator[](std::initializer_list<char const* const> init_list) const
{
return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); });
}
//////////////////////////////////////////////////////////////////////////
inline std::string const& parser::operator[](size_t ind) const
{
if (ind < pos_args_.size())
return pos_args_[ind];
return empty_;
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::operator()(std::string const& name) const
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
return bad_stream();
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::operator()(std::initializer_list<char const* const> init_list) const
{
for (auto& name : init_list)
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
}
return bad_stream();
}
//////////////////////////////////////////////////////////////////////////
template<typename T>
string_stream parser::operator()(std::string const& name, T&& def_val) const
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
std::ostringstream ostr;
ostr.precision(std::numeric_limits<long double>::max_digits10);
ostr << def_val;
return string_stream(ostr.str()); // use default
}
//////////////////////////////////////////////////////////////////////////
// same as above but for a list of names. returns the first value to be found.
template<typename T>
string_stream parser::operator()(std::initializer_list<char const* const> init_list, T&& def_val) const
{
for (auto& name : init_list)
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
}
std::ostringstream ostr;
ostr.precision(std::numeric_limits<long double>::max_digits10);
ostr << def_val;
return string_stream(ostr.str()); // use default
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::operator()(size_t ind) const
{
if (pos_args_.size() <= ind)
return bad_stream();
return string_stream(pos_args_[ind]);
}
//////////////////////////////////////////////////////////////////////////
template<typename T>
string_stream parser::operator()(size_t ind, T&& def_val) const
{
if (pos_args_.size() <= ind)
{
std::ostringstream ostr;
ostr.precision(std::numeric_limits<long double>::max_digits10);
ostr << def_val;
return string_stream(ostr.str());
}
return string_stream(pos_args_[ind]);
}
//////////////////////////////////////////////////////////////////////////
inline void parser::add_param(std::string const& name)
{
registeredParams_.insert(trim_leading_dashes(name));
}
//////////////////////////////////////////////////////////////////////////
inline void parser::add_params(std::initializer_list<char const* const> init_list)
{
for (auto& name : init_list)
registeredParams_.insert(trim_leading_dashes(name));
}
}

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(peaddrconv)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
@ -35,11 +35,10 @@ else ()
endif ()
endif ()
find_package(peparse REQUIRED)
find_package(pe-parse REQUIRED)
add_executable(${PROJECT_NAME} main.cpp)
target_link_libraries(${PROJECT_NAME} ${PEPARSE_LIBRARIES})
target_include_directories(${PROJECT_NAME} PRIVATE ${PEPARSE_INCLUDE_DIR})
target_link_libraries(${PROJECT_NAME} pe-parse::pe-parse)
target_compile_options(${PROJECT_NAME} PRIVATE ${PEADDRCONV_CXXFLAGS})
install(TARGETS ${PROJECT_NAME} DESTINATION "bin")

View File

@ -6,7 +6,7 @@
#include <climits>
#include <cstring>
#include <parser-library/parse.h>
#include <pe-parse/parse.h>
using ParsedPeRef =
std::unique_ptr<peparse::parsed_pe, void (*)(peparse::parsed_pe *)>;
@ -243,7 +243,9 @@ bool convertAddress(ParsedPeRef &pe,
result);
}
default: { return false; }
default: {
return false;
}
}
}

View File

@ -1,81 +1,63 @@
cmake_minimum_required(VERSION 3.7)
project(pe-parser-library)
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(pe-parse)
set(UNICODE_LIBRARY "any" CACHE STRING "Select a unicode library")
set_property(CACHE UNICODE_LIBRARY PROPERTY STRINGS "any" "icu" "codecvt")
# This variable is used twice so setting once at the top here to prevent
# the chance they get out of sync.
# This is the minimum "required" version but there's a good chance early
# versions of ICU support the simple functionality needed by this project.
set(ICU_MINIMUM_REQUIRED 55.0)
message(STATUS "VERSION file: ${PROJECT_SOURCE_DIR}/../VERSION")
# List all files explicitly; this will make IDEs happy (i.e. QtCreator, CLion, ...)
list(APPEND PEPARSERLIB_SOURCEFILES
include/parser-library/parse.h
include/parser-library/nt-headers.h
include/parser-library/to_string.h
include/pe-parse/parse.h
include/pe-parse/nt-headers.h
include/pe-parse/to_string.h
src/buffer.cpp
src/parse.cpp
)
# Check for codecvt support. Likely the proper way to do this would be to
# use CMake system inspection via methods like "try_compile" to determine
# if the "#include <codecvt>" directive compiles successfully.
if (MSVC)
if (MSVC_VERSION LESS 1900)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
else ()
find_path(CODECVT_INCLUDE_DIR NAMES "codecvt")
if (CODECVT_INCLUDE_DIR)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
endif ()
if(${UNICODE_LIBRARY} MATCHES "icu")
find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc REQUIRED)
add_definitions(-DUSE_ICU4C)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
elseif(${UNICODE_LIBRARY} MATCHES "codecvt")
if(NOT CODECVT_SUPPORTED)
message(SEND_ERROR "codecvt header not found")
endif()
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
# NOTE(ww): On Windows we use the Win32 API's built-in UTF16 conversion
# routines; on other platforms we use codecvt. codecvt is nominally deprecated
# in C++17 and onwards, but will probably be available for quite some time.
# Previous versions of pe-parse used ICU when available, but this caused
# DLL hell on Windows and wasn't worth the additional dependency.
if(MSVC)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_winapi.cpp)
else()
find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc)
if(ICU_FOUND)
add_definitions(-DUSE_ICU4C)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
elseif(CODECVT_SUPPORTED)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
else()
message(SEND_ERROR "unable to find codecvt header or ICU library (hint: try installing libicu-dev)")
endif(ICU_FOUND)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
endif()
add_library(${PROJECT_NAME} ${PEPARSERLIB_SOURCEFILES})
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
if(ICU_FOUND)
target_link_libraries(${PROJECT_NAME} ICU::uc)
endif()
install(TARGETS ${PROJECT_NAME}
RUNTIME DESTINATION "bin"
LIBRARY DESTINATION "lib"
ARCHIVE DESTINATION "lib"
if(PEPARSE_LIBRARY_WARNINGS)
target_compile_definitions(${PROJECT_NAME} PRIVATE PEPARSE_LIBRARY_WARNINGS=1)
endif ()
target_include_directories(
${PROJECT_NAME}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)
install(FILES "cmake/peparse-config.cmake" DESTINATION "lib/cmake/peparse")
install(DIRECTORY "include/parser-library" DESTINATION "include")
target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
install(
TARGETS ${PROJECT_NAME}
EXPORT pe-parse-config
RUNTIME
DESTINATION "bin"
LIBRARY
DESTINATION "lib"
ARCHIVE
DESTINATION "lib"
)
export(
TARGETS ${PROJECT_NAME}
NAMESPACE pe-parse::
FILE "${CMAKE_CURRENT_BINARY_DIR}/pe-parse-config.cmake"
)
install(
EXPORT
pe-parse-config
DESTINATION "lib/cmake/pe-parse"
NAMESPACE pe-parse::
EXPORT_LINK_INTERFACE_LIBRARIES
)
install(DIRECTORY "include/pe-parse" DESTINATION "include")

View File

@ -0,0 +1,5 @@
find_path(PEPARSE_INCLUDE_DIR "pe-parse/parse.h")
find_library(PEPARSE_LIBRARIES NAMES "libpe-parse" "pe-parse")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(pe-parse DEFAULT_MSG PEPARSE_INCLUDE_DIR PEPARSE_LIBRARIES)

View File

@ -1,9 +0,0 @@
if(CMAKE_CROSSCOMPILING)
find_path(PEPARSE_INCLUDE_DIR "parser-library/parse.h")
else()
find_path(PEPARSE_INCLUDE_DIR $<SHELL_PATH:"parser-library/parse.h">)
endif()
find_library(PEPARSE_LIBRARIES NAMES "libpe-parser-library" "pe-parser-library")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(peparse DEFAULT_MSG PEPARSE_INCLUDE_DIR PEPARSE_LIBRARIES)

View File

@ -28,10 +28,6 @@ THE SOFTWARE.
#include <string>
#include <vector>
#define _offset(t, f) \
static_cast<std::uint32_t>( \
reinterpret_cast<std::ptrdiff_t>(&static_cast<t *>(nullptr)->f))
// need to pack these structure definitions
// some constant definitions
@ -210,6 +206,19 @@ constexpr std::uint8_t IMAGE_SYM_CLASS_FILE = 103;
constexpr std::uint8_t IMAGE_SYM_CLASS_SECTION = 104;
constexpr std::uint8_t IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105;
constexpr std::uint8_t IMAGE_SYM_CLASS_CLR_TOKEN = 107;
// Optional header DLL characteristics
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE = 0x0040;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY = 0x0080;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NX_COMPAT = 0x0100;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = 0x0200;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_SEH = 0x0400;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_BIND = 0x0800;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_APPCONTAINER = 0x1000;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = 0x2000;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_GUARD_CF = 0x4000;
constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000;
#endif
// clang-format on

View File

@ -32,7 +32,7 @@ THE SOFTWARE.
#include "to_string.h"
#ifdef _MSC_VER
#define __typeof__(x) std::remove_reference < decltype(x)> ::type
#define __typeof__(x) std::remove_reference<decltype(x)>::type
#endif
#define PE_ERR(x) \
@ -40,28 +40,28 @@ THE SOFTWARE.
err_loc.assign(__func__); \
err_loc += ":" + to_string<std::uint32_t>(__LINE__, std::dec);
#define READ_WORD(b, o, inst, member) \
if (!readWord(b, o + _offset(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
}
#define READ_DWORD(b, o, inst, member) \
if (!readDword(b, o + _offset(__typeof__(inst), member), inst.member)) { \
#define READ_WORD(b, o, inst, member) \
if (!readWord(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
}
#define READ_QWORD(b, o, inst, member) \
if (!readQword(b, o + _offset(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
#define READ_DWORD(b, o, inst, member) \
if (!readDword(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
}
#define READ_BYTE(b, o, inst, member) \
if (!readByte(b, o + _offset(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
#define READ_QWORD(b, o, inst, member) \
if (!readQword(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
}
#define READ_BYTE(b, o, inst, member) \
if (!readByte(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
PE_ERR(PEERR_READ); \
return false; \
}
#define TEST_MACHINE_CHARACTERISTICS(h, m, ch) \
@ -149,6 +149,7 @@ bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out);
bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out);
bounded_buffer *readFileToFileBuffer(const char *filePath);
bounded_buffer *makeBufferFromPointer(std::uint8_t *data, std::uint32_t sz);
bounded_buffer *
splitBuffer(bounded_buffer *b, std::uint32_t from, std::uint32_t to);
void deleteBuffer(bounded_buffer *b);
@ -186,42 +187,54 @@ std::string GetPEErrLoc();
// get a PE parse context from a file
parsed_pe *ParsePEFromFile(const char *filePath);
parsed_pe *ParsePEFromPointer(std::uint8_t *buffer, std::uint32_t sz);
parsed_pe *ParsePEFromBuffer(bounded_buffer *buffer);
// destruct a PE context
void DestructParsedPE(parsed_pe *p);
// iterate over Rich header entries
typedef int (*iterRich)(void *, rich_entry);
typedef int (*iterRich)(void *, const rich_entry &);
void IterRich(parsed_pe *pe, iterRich cb, void *cbd);
// iterate over the resources
typedef int (*iterRsrc)(void *, resource);
typedef int (*iterRsrc)(void *, const resource &);
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd);
// iterate over the imports by RVA and string
typedef int (*iterVAStr)(void *, VA, const std::string &, const std::string &);
typedef int (*iterVAStr)(void *,
const VA &,
const std::string &,
const std::string &);
void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd);
// iterate over relocations in the PE file
typedef int (*iterReloc)(void *, VA, reloc_type);
typedef int (*iterReloc)(void *, const VA &, const reloc_type &);
void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd);
// Iterate over symbols (symbol table) in the PE file
typedef int (*iterSymbol)(void *,
std::string &,
std::uint32_t &,
std::int16_t &,
std::uint16_t &,
std::uint8_t &,
std::uint8_t &);
const std::string &,
const std::uint32_t &,
const std::int16_t &,
const std::uint16_t &,
const std::uint8_t &,
const std::uint8_t &);
void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd);
// iterate over the exports
typedef int (*iterExp)(void *, VA, std::string &, std::string &);
typedef int (*iterExp)(void *,
const VA &,
const std::string &,
const std::string &);
void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd);
// iterate over sections
typedef int (*iterSec)(
void *, VA secBase, std::string &, image_section_header, bounded_buffer *b);
typedef int (*iterSec)(void *,
const VA &,
const std::string &,
const image_section_header &,
const bounded_buffer *);
void IterSec(parsed_pe *pe, iterSec cb, void *cbd);
// get byte at VA in PE

View File

@ -3,9 +3,8 @@
#include <sstream>
#include <string>
#ifdef USE_ICU4C
#include <unicode/unistr.h>
typedef std::basic_string<UChar> UCharString;
#if defined(_MSC_VER)
typedef std::basic_string<wchar_t> UCharString;
#else
typedef std::u16string UCharString;
#endif

View File

@ -26,7 +26,7 @@ THE SOFTWARE.
#include <fstream>
// keep this header above "windows.h" because it contains many types
#include <parser-library/parse.h>
#include <pe-parse/parse.h>
#ifdef _WIN32
@ -298,6 +298,28 @@ bounded_buffer *readFileToFileBuffer(const char *filePath) {
return p;
}
bounded_buffer *makeBufferFromPointer(std::uint8_t *data, std::uint32_t sz) {
if (data == nullptr) {
PE_ERR(PEERR_MEM);
return nullptr;
}
bounded_buffer *p = new (std::nothrow) bounded_buffer();
if (p == nullptr) {
PE_ERR(PEERR_MEM);
return nullptr;
}
p->copy = true;
p->detail = nullptr;
p->buf = data;
p->bufLen = sz;
p->swapBytes = false;
return p;
}
// split buffer inclusively from from to to by offset
bounded_buffer *
splitBuffer(bounded_buffer *b, std::uint32_t from, std::uint32_t to) {

View File

@ -24,14 +24,15 @@ THE SOFTWARE.
#include <algorithm>
#include <array>
#include <cassert>
#include <cstring>
#include <iostream>
#include <stdexcept>
#include <vector>
#include <parser-library/nt-headers.h>
#include <parser-library/parse.h>
#include <parser-library/to_string.h>
#include <pe-parse/nt-headers.h>
#include <pe-parse/parse.h>
#include <pe-parse/to_string.h>
namespace peparse {
@ -597,7 +598,7 @@ bool getSecForVA(const std::vector<section> &secs, VA v, section &sec) {
}
void IterRich(parsed_pe *pe, iterRich cb, void *cbd) {
for (rich_entry r : pe->peHeader.rich.Entries) {
for (rich_entry &r : pe->peHeader.rich.Entries) {
if (cb(cbd, r) != 0) {
break;
}
@ -607,13 +608,11 @@ void IterRich(parsed_pe *pe, iterRich cb, void *cbd) {
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
parsed_pe_internal *pint = pe->internal;
for (resource r : pint->rsrcs) {
for (const resource &r : pint->rsrcs) {
if (cb(cbd, r) != 0) {
break;
}
}
return;
}
bool parse_resource_id(bounded_buffer *data,
@ -672,7 +671,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
rde = new resource_dir_entry;
}
if (!readDword(sectionData, o + _offset(__typeof__(*rde), ID), rde->ID)) {
if (!readDword(sectionData, o + offsetof(__typeof__(*rde), ID), rde->ID)) {
PE_ERR(PEERR_READ);
if (dirent == nullptr) {
delete rde;
@ -680,7 +679,8 @@ bool parse_resource_table(bounded_buffer *sectionData,
return false;
}
if (!readDword(sectionData, o + _offset(__typeof__(*rde), RVA), rde->RVA)) {
if (!readDword(
sectionData, o + offsetof(__typeof__(*rde), RVA), rde->RVA)) {
PE_ERR(PEERR_READ);
if (dirent == nullptr) {
delete rde;
@ -761,7 +761,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
*/
if (!readDword(sectionData,
rde->RVA + _offset(__typeof__(rdat), RVA),
rde->RVA + offsetof(__typeof__(rdat), RVA),
rdat.RVA)) {
PE_ERR(PEERR_READ);
if (dirent == nullptr) {
@ -771,7 +771,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
}
if (!readDword(sectionData,
rde->RVA + _offset(__typeof__(rdat), size),
rde->RVA + offsetof(__typeof__(rdat), size),
rdat.size)) {
PE_ERR(PEERR_READ);
if (dirent == nullptr) {
@ -781,7 +781,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
}
if (!readDword(sectionData,
rde->RVA + _offset(__typeof__(rdat), codepage),
rde->RVA + offsetof(__typeof__(rdat), codepage),
rdat.codepage)) {
PE_ERR(PEERR_READ);
if (dirent == nullptr) {
@ -791,7 +791,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
}
if (!readDword(sectionData,
rde->RVA + _offset(__typeof__(rdat), reserved),
rde->RVA + offsetof(__typeof__(rdat), reserved),
rdat.reserved)) {
PE_ERR(PEERR_READ);
if (dirent == nullptr) {
@ -937,9 +937,22 @@ bool getSections(bounded_buffer *b,
std::uint32_t highOff = lowOff + curSec.SizeOfRawData;
thisSec.sectionData = splitBuffer(fileBegin, lowOff, highOff);
// GH#109: we trusted [lowOff, highOff) to be a range that yields
// a valid bounded_buffer, despite these being user-controllable.
// splitBuffer correctly handles this, but we failed to check for
// the nullptr it returns as a sentinel.
if (thisSec.sectionData == nullptr) {
return false;
}
secs.push_back(thisSec);
}
std::sort(
secs.begin(), secs.end(), [](const section &lhs, const section &rhs) {
return lhs.sec.PointerToRawData < rhs.sec.PointerToRawData;
});
return true;
}
@ -982,15 +995,15 @@ bool readOptionalHeader(bounded_buffer *b, optional_header_32 &header) {
for (std::uint32_t i = 0; i < header.NumberOfRvaAndSizes; i++) {
std::uint32_t c = (i * sizeof(data_directory));
c += _offset(optional_header_32, DataDirectory[0]);
c += offsetof(optional_header_32, DataDirectory[0]);
std::uint32_t o;
o = c + _offset(data_directory, VirtualAddress);
o = c + offsetof(data_directory, VirtualAddress);
if (!readDword(b, o, header.DataDirectory[i].VirtualAddress)) {
return false;
}
o = c + _offset(data_directory, Size);
o = c + offsetof(data_directory, Size);
if (!readDword(b, o, header.DataDirectory[i].Size)) {
return false;
}
@ -1037,15 +1050,15 @@ bool readOptionalHeader64(bounded_buffer *b, optional_header_64 &header) {
for (std::uint32_t i = 0; i < header.NumberOfRvaAndSizes; i++) {
std::uint32_t c = (i * sizeof(data_directory));
c += _offset(optional_header_64, DataDirectory[0]);
c += offsetof(optional_header_64, DataDirectory[0]);
std::uint32_t o;
o = c + _offset(data_directory, VirtualAddress);
o = c + offsetof(data_directory, VirtualAddress);
if (!readDword(b, o, header.DataDirectory[i].VirtualAddress)) {
return false;
}
o = c + _offset(data_directory, Size);
o = c + offsetof(data_directory, Size);
if (!readDword(b, o, header.DataDirectory[i].Size)) {
return false;
}
@ -1080,7 +1093,7 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
header.Signature = pe_magic;
bounded_buffer *fhb =
splitBuffer(b, _offset(nt_header_32, FileHeader), b->bufLen);
splitBuffer(b, offsetof(nt_header_32, FileHeader), b->bufLen);
if (fhb == nullptr) {
PE_ERR(PEERR_MEM);
@ -1119,7 +1132,7 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
* buffer regardless.
*/
bounded_buffer *ohb =
splitBuffer(b, _offset(nt_header_32, OptionalHeader), b->bufLen);
splitBuffer(b, offsetof(nt_header_32, OptionalHeader), b->bufLen);
if (ohb == nullptr) {
deleteBuffer(fhb);
@ -1166,7 +1179,17 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
// zero extends its first argument to 32 bits and then performs a rotate left
// operation equal to the second arguments value of the first arguments bits
static inline std::uint32_t rol(std::uint32_t val, std::uint32_t num) {
return ((val << num) & 0xffffffff) | (val >> (32 - num));
assert(num < 32);
// Disable MSVC warning for unary minus operator applied to unsigned type
#if defined(_MSC_VER) || defined(_MSC_FULL_VER)
#pragma warning(push)
#pragma warning(disable : 4146)
#endif
// https://blog.regehr.org/archives/1063
return (val << num) | (val >> (-num & 31));
#if defined(_MSC_VER) || defined(_MSC_FULL_VER)
#pragma warning(pop)
#endif
}
std::uint32_t calculateRichChecksum(const bounded_buffer *b, pe_header &p) {
@ -1182,7 +1205,7 @@ std::uint32_t calculateRichChecksum(const bounded_buffer *b, pe_header &p) {
if (i >= 0x3C && i <= 0x3F) {
continue;
}
checksum += rol(b->buf[i], i);
checksum += rol(b->buf[i], i & 0x1F);
}
// Next, take summation of each Rich header entry by combining its ProductId
@ -1467,7 +1490,7 @@ bool getExports(parsed_pe *p) {
// get the name of this module
std::uint32_t nameRva;
if (!readDword(s.sectionData,
rvaofft + _offset(export_dir_table, NameRVA),
rvaofft + offsetof(export_dir_table, NameRVA),
nameRva)) {
return false;
}
@ -1495,7 +1518,7 @@ bool getExports(parsed_pe *p) {
// now, get all the named export symbols
std::uint32_t numNames;
if (!readDword(s.sectionData,
rvaofft + _offset(export_dir_table, NumberOfNamePointers),
rvaofft + offsetof(export_dir_table, NumberOfNamePointers),
numNames)) {
return false;
}
@ -1504,7 +1527,7 @@ bool getExports(parsed_pe *p) {
// get the names section
std::uint32_t namesRVA;
if (!readDword(s.sectionData,
rvaofft + _offset(export_dir_table, NamePointerRVA),
rvaofft + offsetof(export_dir_table, NamePointerRVA),
namesRVA)) {
return false;
}
@ -1529,7 +1552,8 @@ bool getExports(parsed_pe *p) {
// get the EAT section
std::uint32_t eatRVA;
if (!readDword(s.sectionData,
rvaofft + _offset(export_dir_table, ExportAddressTableRVA),
rvaofft +
offsetof(export_dir_table, ExportAddressTableRVA),
eatRVA)) {
return false;
}
@ -1553,7 +1577,7 @@ bool getExports(parsed_pe *p) {
// get the ordinal base
std::uint32_t ordinalBase;
if (!readDword(s.sectionData,
rvaofft + _offset(export_dir_table, OrdinalBase),
rvaofft + offsetof(export_dir_table, OrdinalBase),
ordinalBase)) {
return false;
}
@ -1561,7 +1585,7 @@ bool getExports(parsed_pe *p) {
// get the ordinal table
std::uint32_t ordinalTableRVA;
if (!readDword(s.sectionData,
rvaofft + _offset(export_dir_table, OrdinalTableRVA),
rvaofft + offsetof(export_dir_table, OrdinalTableRVA),
ordinalTableRVA)) {
return false;
}
@ -1704,13 +1728,13 @@ bool getRelocations(parsed_pe *p) {
std::uint32_t blockSize;
if (!readDword(d.sectionData,
rvaofft + _offset(reloc_block, PageRVA),
rvaofft + offsetof(reloc_block, PageRVA),
pageRva)) {
return false;
}
if (!readDword(d.sectionData,
rvaofft + _offset(reloc_block, BlockSize),
rvaofft + offsetof(reloc_block, BlockSize),
blockSize)) {
return false;
}
@ -2294,31 +2318,36 @@ bool getSymbolTable(parsed_pe *p) {
}
} else {
// std::ios::fmtflags originalStreamFlags(std::cerr.flags());
#ifdef PEPARSE_LIBRARY_WARNINGS
std::ios::fmtflags originalStreamFlags(std::cerr.flags());
// auto storageClassName = GetSymbolTableStorageClassName(sym.storageClass);
// if (storageClassName == nullptr) {
// std::cerr << "Warning: Skipping auxiliary symbol of type 0x" << std::hex
// << static_cast<std::uint32_t>(sym.storageClass)
// << " at offset 0x" << std::hex << offset << "\n";
// } else {
// std::cerr << "Warning: Skipping auxiliary symbol of type "
// << storageClassName << " at offset 0x" << std::hex << offset
// << "\n";
// }
auto storageClassName = GetSymbolTableStorageClassName(sym.storageClass);
if (storageClassName == nullptr) {
std::cerr << "Warning: Skipping auxiliary symbol of type 0x" << std::hex
<< static_cast<std::uint32_t>(sym.storageClass)
<< " at offset 0x" << std::hex << offset << "\n";
} else {
// std::cerr.flags(originalStreamFlags);
std::cerr << "Warning: Skipping auxiliary symbol of type "
<< storageClassName << " at offset 0x" << std::hex << offset
<< "\n";
}
std::cerr.flags(originalStreamFlags);
#endif
offset = nextSymbolOffset;
}
if (offset != nextSymbolOffset) {
// std::ios::fmtflags originalStreamFlags(std::cerr.flags());
#ifdef PEPARSE_LIBRARY_WARNINGS
std::ios::fmtflags originalStreamFlags(std::cerr.flags());
// std::cerr << "Warning: Invalid internal offset (current: 0x" << std::hex
// << offset << ", expected: 0x" << std::hex << nextSymbolOffset
// << ")\n";
std::cerr << "Warning: Invalid internal offset (current: 0x" << std::hex
<< offset << ", expected: 0x" << std::hex << nextSymbolOffset
<< ")\n";
// std::cerr.flags(originalStreamFlags);
std::cerr.flags(originalStreamFlags);
#endif
offset = nextSymbolOffset;
}
}
@ -2326,7 +2355,7 @@ bool getSymbolTable(parsed_pe *p) {
return true;
}
parsed_pe *ParsePEFromFile(const char *filePath) {
parsed_pe *ParsePEFromBuffer(bounded_buffer *buffer) {
// First, create a new parsed_pe structure
// We pass std::nothrow parameter to new so in case of failure it returns
// nullptr instead of throwing exception std::bad_alloc.
@ -2338,13 +2367,7 @@ parsed_pe *ParsePEFromFile(const char *filePath) {
}
// Make a new buffer object to hold just our file data
p->fileBuffer = readFileToFileBuffer(filePath);
if (p->fileBuffer == nullptr) {
delete p;
// err is set by readFileToFileBuffer
return nullptr;
}
p->fileBuffer = buffer;
p->internal = new (std::nothrow) parsed_pe_internal();
@ -2420,6 +2443,28 @@ parsed_pe *ParsePEFromFile(const char *filePath) {
return p;
}
parsed_pe *ParsePEFromFile(const char *filePath) {
auto buffer = readFileToFileBuffer(filePath);
if (buffer == nullptr) {
// err is set by readFileToFileBuffer
return nullptr;
}
return ParsePEFromBuffer(buffer);
}
parsed_pe *ParsePEFromPointer(std::uint8_t *ptr, std::uint32_t sz) {
auto buffer = makeBufferFromPointer(ptr, sz);
if (buffer == nullptr) {
// err is set by makeBufferFromPointer
return nullptr;
}
return ParsePEFromBuffer(buffer);
}
void DestructParsedPE(parsed_pe *p) {
if (p == nullptr) {
return;
@ -2447,7 +2492,7 @@ void DestructParsedPE(parsed_pe *p) {
void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd) {
std::vector<importent> &l = pe->internal->imports;
for (importent i : l) {
for (importent &i : l) {
if (cb(cbd, i.addr, i.moduleName, i.symbolName) != 0) {
break;
}
@ -2460,7 +2505,7 @@ void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd) {
void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) {
std::vector<reloc> &l = pe->internal->relocs;
for (reloc r : l) {
for (reloc &r : l) {
if (cb(cbd, r.shiftedAddr, r.type) != 0) {
break;
}
@ -2473,7 +2518,7 @@ void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) {
void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) {
std::vector<symbol> &l = pe->internal->symbols;
for (symbol s : l) {
for (symbol &s : l) {
if (cb(cbd,
s.strName,
s.value,
@ -2492,7 +2537,7 @@ void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) {
void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) {
std::vector<exportent> &l = pe->internal->exports;
for (exportent i : l) {
for (exportent &i : l) {
if (cb(cbd, i.addr, i.moduleName, i.symbolName) != 0) {
break;
}
@ -2505,7 +2550,7 @@ void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) {
void IterSec(parsed_pe *pe, iterSec cb, void *cbd) {
parsed_pe_internal *pint = pe->internal;
for (section s : pint->secs) {
for (section &s : pint->secs) {
if (cb(cbd, s.sectionBase, s.sectionName, s.sec, s.sectionData) != 0) {
break;
}
@ -2647,20 +2692,38 @@ bool GetDataDirectoryEntry(parsed_pe *pe,
return false;
}
section sec;
if (!getSecForVA(pe->internal->secs, addr, sec)) {
PE_ERR(PEERR_SECTVA);
return false;
}
/* NOTE(ww): DIR_SECURITY is an annoying special case: its contents
* are never mapped into memory, so its "RVA" is actually a direct
* file offset.
* See:
* https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only
*/
if (dirnum == DIR_SECURITY) {
auto *buf = splitBuffer(
pe->fileBuffer, dir.VirtualAddress, dir.VirtualAddress + dir.Size);
if (buf == nullptr) {
PE_ERR(PEERR_SIZE);
return false;
}
auto off = static_cast<std::uint32_t>(addr - sec.sectionBase);
if (off + dir.Size >= sec.sectionData->bufLen) {
PE_ERR(PEERR_SIZE);
return false;
}
raw_entry.assign(buf->buf, buf->buf + buf->bufLen);
deleteBuffer(buf);
} else {
section sec;
if (!getSecForVA(pe->internal->secs, addr, sec)) {
PE_ERR(PEERR_SECTVA);
return false;
}
raw_entry.assign(sec.sectionData->buf + off,
sec.sectionData->buf + off + dir.Size);
auto off = static_cast<std::uint32_t>(addr - sec.sectionBase);
if (off + dir.Size >= sec.sectionData->bufLen) {
PE_ERR(PEERR_SIZE);
return false;
}
raw_entry.assign(sec.sectionData->buf + off,
sec.sectionData->buf + off + dir.Size);
}
return true;
}

View File

@ -24,25 +24,13 @@ THE SOFTWARE.
#include <codecvt>
#include <locale>
#include <parser-library/to_string.h>
#include <pe-parse/to_string.h>
namespace peparse {
// See
// https://stackoverflow.com/questions/38688417/utf-conversion-functions-in-c11
std::string from_utf16(const UCharString &u) {
#if defined(_MSC_VER)
// std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>convert;
// // Doesn't compile with Visual Studio. See
// https://stackoverflow.com/questions/32055357/visual-studio-c-2015-stdcodecvt-with-char16-t-or-char32-t
std::wstring_convert<std::codecvt_utf8<std::int16_t>, std::int16_t> convert;
auto p = reinterpret_cast<const std::int16_t *>(u.data());
return convert.to_bytes(p, p + u.size());
#else
// -std=c++11 or -std=c++14
// Requires GCC 5 or higher
// Requires Clang ??? or higher (tested on Clang 3.8, 5.0, 6.0)
std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
return convert.to_bytes(u);
#endif
}
} // namespace peparse

View File

@ -1,7 +1,7 @@
/*
The MIT License (MIT)
Copyright (c) 2019 Trail of Bits, Inc.
Copyright (c) 2020 Trail of Bits, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -22,14 +22,35 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <parser-library/to_string.h>
#include <unicode/unistr.h>
#include <Windows.h>
#include <pe-parse/to_string.h>
namespace peparse {
std::string from_utf16(const UCharString &u) {
icu::UnicodeString utf16_string = icu::UnicodeString(u.data(), u.length());
std::string result;
utf16_string.toUTF8String(result);
std::size_t size = WideCharToMultiByte(CP_UTF8,
0,
u.data(),
static_cast<int>(u.size()),
nullptr,
0,
nullptr,
nullptr);
if (size <= 0) {
return result;
}
result.reserve(size);
WideCharToMultiByte(CP_UTF8,
0,
u.data(),
static_cast<int>(u.size()),
&result[0],
static_cast<int>(result.capacity()),
nullptr,
nullptr);
return result;
}
} // namespace peparse

204
pepy/README.md Normal file
View File

@ -0,0 +1,204 @@
pepy
====
pepy (pronounced p-pie) is a python binding to the pe-parse parser.
pepy supports Python versions 3.6 and above.
The easiest way to use pepy is to install it via pip:
```bash
$ pip3 install pepy
```
## Building
If you can build pe-parse and have a working python environment (headers and
libraries) you can build pepy.
1. Build pepy:
* `python3 setup.py build`
2. Install pepy:
* `python3 setup.py install`
**Building on Windows:** Python 3.x is typically installed as _python.exe_,
**NOT** _python3.exe_.
## Using
### Parsed object
There are a number of objects involved in pepy. The main one is the **parsed**
object. This object is returned by the *parse* method.
```python
import pepy
p = pepy.parse("/path/to/exe")
```
The **parsed** object has a number of methods:
* `get_entry_point`: Return the entry point address
* `get_machine_as_str`: Return the machine as a human readable string
* `get_subsystem_as_str`: Return the subsystem as a human readable string
* `get_bytes`: Return the first N bytes at a given address
* `get_sections`: Return a list of section objects
* `get_imports`: Return a list of import objects
* `get_exports`: Return a list of export objects
* `get_relocations`: Return a list of relocation objects
* `get_resources`: Return a list of resource objects
The **parsed** object has a number of attributes:
* `signature`
* `machine`
* `numberofsections`
* `timedatestamp`
* `numberofsymbols`
* `characteristics`
* `magic`
* `majorlinkerver`
* `minorlinkerver`
* `codesize`
* `initdatasize`
* `uninitdatasize`
* `entrypointaddr`
* `baseofcode`
* `baseofdata`
* `imagebase`
* `sectionalignement`
* `filealignment`
* `majorosver`
* `minorosver`
* `win32ver`
* `imagesize`
* `headersize`
* `checksum`
* `subsystem`
* `dllcharacteristics`
* `stackreservesize`
* `stackcommitsize`
* `heapreservesize`
* `heapcommitsize`
* `loaderflags`
* `rvasandsize`
Example:
```python
import time
import pepy
p = pepy.parse("/path/to/exe")
print("Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp)))
ep = p.get_entry_point()
print("Entry point: 0x%x" % ep)
```
The `get_sections`, `get_imports`, `get_exports`, `get_relocations` and
`get_resources` methods each return a list of objects. The type of object
depends upon the method called. `get_sections` returns a list of `section`
objects, `get_imports` returns a list of `import` objects, etc.
### Section Object
The `section` object has the following attributes:
* `base`
* `length`
* `virtaddr`
* `virtsize`
* `numrelocs`
* `numlinenums`
* `characteristics`
* `data`
### Import Object
The `import` object has the following attributes:
* `sym`
* `name`
* `addr`
### Export Object
The `export` object has the following attributes:
* `mod`
* `func`
* `addr`
### Relocation Object
The `relocation` object has the following attributes:
* `type`
* `addr`
### Resource Object
The `resource` object has the following attributes:
* `type_str`
* `name_str`
* `lang_str`
* `type`
* `name`
* `lang`
* `codepage`
* `RVA`
* `size`
* `data`
The `resource` object has the following methods:
* `type_as_str`
Resources are stored in a directory structure. The first three levels of the
are called `type`, `name` and `lang`. Each of these levels can have
either a pre-defined value or a custom string. The pre-defined values are
stored in the `type`, `name` and `lang` attributes. If a custom string is
found it will be stored in the `type_str`, `name_str` and `lang_str`
attributes. The `type_as_str` method can be used to convert a pre-defined
type value to a string representation.
The following code shows how to iterate through resources:
```python
import pepy
from hashlib import md5
import sys
p = pepy.parse(sys.argv[1])
resources = p.get_resources()
print("Resources: (%i)" % len(resources))
for resource in resources:
print("[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest()))
if resource.type_str:
print("\tType string: %s" % resource.type_str)
else:
print("\tType: %s (%s)" % (hex(resource.type), resource.type_as_str()))
if resource.name_str:
print("\tName string: %s" % resource.name_str)
else:
print("\tName: %s" % hex(resource.name))
if resource.lang_str:
print("\tLang string: %s" % resource.lang_str)
else:
print("\tLang: %s" % hex(resource.lang))
print("\tCodepage: %s" % hex(resource.codepage))
print("\tRVA: %s" % hex(resource.RVA))
print("\tSize: %s" % hex(resource.size))
```
Note that some binaries (particularly packed) may have corrupt resource entries.
In these cases you may find that `len(resource.data)` is 0 but `resource.size` is
greater than 0. The `size` attribute is the size of the data as declared by the
resource data entry.
## Authors
pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com).
pepy was written by Wesley Shields (wxs@atarininja.org).

View File

@ -26,31 +26,14 @@
*/
#include <Python.h>
#include <parser-library/parse.h>
#include <pe-parse/parse.h>
#include <structmember.h>
using namespace peparse;
#define PEPY_VERSION "0.3"
/*
* Add some definition for compatibility between python2 and python3
/* NOTE(ww): These don't necessarily have to be the same, but currently are.
*/
#if PY_MAJOR_VERSION >= 3
#define PyString_FromString PyUnicode_FromString
#endif
/*
* Some macro only available after python 2.6
* Needed for compatibility with python3
*/
#ifndef PyVarObject_HEAD_INIT
#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
#endif
#ifndef Py_TYPE
#define Py_TYPE(_ob_) (((PyObject *) (_ob_))->ob_type)
#endif
#define PEPY_VERSION PEPARSE_VERSION
/* These are used to across multiple objects. */
#define PEPY_OBJECT_GET(OBJ, ATTR) \
@ -154,9 +137,9 @@ static void pepy_import_dealloc(pepy_import *self) {
Py_TYPE(self)->tp_free((PyObject *) self);
}
PEPY_OBJECT_GET(import, name)
PEPY_OBJECT_GET(import, sym)
PEPY_OBJECT_GET(import, addr)
PEPY_OBJECT_GET(import, name);
PEPY_OBJECT_GET(import, sym);
PEPY_OBJECT_GET(import, addr);
static PyGetSetDef pepy_import_getseters[] = {
OBJECTGETTER(import, name, "Name"),
@ -228,9 +211,9 @@ static void pepy_export_dealloc(pepy_export *self) {
Py_TYPE(self)->tp_free((PyObject *) self);
}
PEPY_OBJECT_GET(export, mod)
PEPY_OBJECT_GET(export, func)
PEPY_OBJECT_GET(export, addr)
PEPY_OBJECT_GET(export, mod);
PEPY_OBJECT_GET(export, func);
PEPY_OBJECT_GET(export, addr);
static PyGetSetDef pepy_export_getseters[] = {
OBJECTGETTER(export, mod, "Module"),
@ -302,8 +285,8 @@ static void pepy_relocation_dealloc(pepy_relocation *self) {
Py_TYPE(self)->tp_free((PyObject *) self);
}
PEPY_OBJECT_GET(relocation, type)
PEPY_OBJECT_GET(relocation, addr)
PEPY_OBJECT_GET(relocation, type);
PEPY_OBJECT_GET(relocation, addr);
static PyGetSetDef pepy_relocation_getseters[] = {
OBJECTGETTER(relocation, type, "Type"),
@ -390,15 +373,15 @@ static void pepy_section_dealloc(pepy_section *self) {
Py_TYPE(self)->tp_free((PyObject *) self);
}
PEPY_OBJECT_GET(section, name)
PEPY_OBJECT_GET(section, base)
PEPY_OBJECT_GET(section, length)
PEPY_OBJECT_GET(section, virtaddr)
PEPY_OBJECT_GET(section, virtsize)
PEPY_OBJECT_GET(section, numrelocs)
PEPY_OBJECT_GET(section, numlinenums)
PEPY_OBJECT_GET(section, characteristics)
PEPY_OBJECT_GET(section, data)
PEPY_OBJECT_GET(section, name);
PEPY_OBJECT_GET(section, base);
PEPY_OBJECT_GET(section, length);
PEPY_OBJECT_GET(section, virtaddr);
PEPY_OBJECT_GET(section, virtsize);
PEPY_OBJECT_GET(section, numrelocs);
PEPY_OBJECT_GET(section, numlinenums);
PEPY_OBJECT_GET(section, characteristics);
PEPY_OBJECT_GET(section, data);
static PyGetSetDef pepy_section_getseters[] = {
OBJECTGETTER(section, name, "Name"),
@ -495,16 +478,16 @@ static void pepy_resource_dealloc(pepy_resource *self) {
Py_TYPE(self)->tp_free((PyObject *) self);
}
PEPY_OBJECT_GET(resource, type_str)
PEPY_OBJECT_GET(resource, name_str)
PEPY_OBJECT_GET(resource, lang_str)
PEPY_OBJECT_GET(resource, type)
PEPY_OBJECT_GET(resource, name)
PEPY_OBJECT_GET(resource, lang)
PEPY_OBJECT_GET(resource, codepage)
PEPY_OBJECT_GET(resource, RVA)
PEPY_OBJECT_GET(resource, size)
PEPY_OBJECT_GET(resource, data)
PEPY_OBJECT_GET(resource, type_str);
PEPY_OBJECT_GET(resource, name_str);
PEPY_OBJECT_GET(resource, lang_str);
PEPY_OBJECT_GET(resource, type);
PEPY_OBJECT_GET(resource, name);
PEPY_OBJECT_GET(resource, lang);
PEPY_OBJECT_GET(resource, codepage);
PEPY_OBJECT_GET(resource, RVA);
PEPY_OBJECT_GET(resource, size);
PEPY_OBJECT_GET(resource, data);
static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
PyObject *ret;
@ -587,7 +570,7 @@ static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
break;
}
ret = PyString_FromString(str);
ret = PyUnicode_FromString(str);
if (!ret) {
PyErr_SetString(pepy_error, "Unable to create return string.");
return NULL;
@ -713,7 +696,7 @@ static PyObject *pepy_parsed_get_machine_as_str(PyObject *self,
if (!str)
Py_RETURN_NONE;
ret = PyString_FromString(str);
ret = PyUnicode_FromString(str);
if (!ret) {
PyErr_SetString(pepy_error, "Unable to create return string.");
return NULL;
@ -731,7 +714,7 @@ static PyObject *pepy_parsed_get_subsystem_as_str(PyObject *self,
if (!str)
Py_RETURN_NONE;
ret = PyString_FromString(str);
ret = PyUnicode_FromString(str);
if (!ret) {
PyErr_SetString(pepy_error, "Unable to create return string.");
return NULL;
@ -804,10 +787,10 @@ static PyObject *pepy_data_converter(bounded_buffer *data) {
}
int section_callback(void *cbd,
VA base,
std::string &name,
image_section_header s,
bounded_buffer *data) {
const VA &base,
const std::string &name,
const image_section_header &s,
const bounded_buffer *data) {
uint32_t buflen;
PyObject *sect;
PyObject *tuple;
@ -878,7 +861,7 @@ static PyObject *pepy_parsed_get_sections(PyObject *self, PyObject *args) {
return ret;
}
int resource_callback(void *cbd, resource r) {
int resource_callback(void *cbd, const resource &r) {
PyObject *rsrc;
PyObject *tuple;
PyObject *list = (PyObject *) cbd;
@ -938,7 +921,7 @@ static PyObject *pepy_parsed_get_resources(PyObject *self, PyObject *args) {
}
int import_callback(void *cbd,
VA addr,
const VA &addr,
const std::string &name,
const std::string &sym) {
PyObject *imp;
@ -985,7 +968,10 @@ static PyObject *pepy_parsed_get_imports(PyObject *self, PyObject *args) {
return ret;
}
int export_callback(void *cbd, VA addr, std::string &mod, std::string &func) {
int export_callback(void *cbd,
const VA &addr,
const std::string &mod,
const std::string &func) {
PyObject *exp;
PyObject *tuple;
PyObject *list = (PyObject *) cbd;
@ -1034,7 +1020,7 @@ static PyObject *pepy_parsed_get_exports(PyObject *self, PyObject *args) {
return ret;
}
int reloc_callback(void *cbd, VA addr, reloc_type type) {
int reloc_callback(void *cbd, const VA &addr, const reloc_type &type) {
PyObject *reloc;
PyObject *tuple;
PyObject *list = (PyObject *) cbd;
@ -1088,13 +1074,13 @@ static PyObject *pepy_parsed_get_relocations(PyObject *self, PyObject *args) {
return ret; \
}
PEPY_PARSED_GET(signature, Signature)
PEPY_PARSED_GET(machine, FileHeader.Machine)
PEPY_PARSED_GET(numberofsections, FileHeader.NumberOfSections)
PEPY_PARSED_GET(timedatestamp, FileHeader.TimeDateStamp)
PEPY_PARSED_GET(numberofsymbols, FileHeader.NumberOfSymbols)
PEPY_PARSED_GET(characteristics, FileHeader.Characteristics)
PEPY_PARSED_GET(magic, OptionalMagic)
PEPY_PARSED_GET(signature, Signature);
PEPY_PARSED_GET(machine, FileHeader.Machine);
PEPY_PARSED_GET(numberofsections, FileHeader.NumberOfSections);
PEPY_PARSED_GET(timedatestamp, FileHeader.TimeDateStamp);
PEPY_PARSED_GET(numberofsymbols, FileHeader.NumberOfSymbols);
PEPY_PARSED_GET(characteristics, FileHeader.Characteristics);
PEPY_PARSED_GET(magic, OptionalMagic);
/*
* This is used to get things from the optional header, which can be either
@ -1125,8 +1111,8 @@ PEPY_PARSED_GET(magic, OptionalMagic)
return ret; \
}
PEPY_PARSED_GET_OPTIONAL(majorlinkerver, MajorLinkerVersion)
PEPY_PARSED_GET_OPTIONAL(minorlinkerver, MinorLinkerVersion)
PEPY_PARSED_GET_OPTIONAL(majorlinkerver, MajorLinkerVersion);
PEPY_PARSED_GET_OPTIONAL(minorlinkerver, MinorLinkerVersion);
PEPY_PARSED_GET_OPTIONAL(codesize, SizeOfCode);
PEPY_PARSED_GET_OPTIONAL(initdatasize, SizeOfInitializedData);
PEPY_PARSED_GET_OPTIONAL(uninitdatasize, SizeOfUninitializedData);
@ -1328,7 +1314,7 @@ static PyObject *pepy_parse(PyObject *self, PyObject *args) {
static PyMethodDef pepy_methods[] = {
{"parse", pepy_parse, METH_VARARGS, "Parse PE from file."}, {NULL}};
static PyObject *pepi_module_init(void) {
PyMODINIT_FUNC PyInit_pepy(void) {
PyObject *m;
if (PyType_Ready(&pepy_parsed_type) < 0 ||
@ -1339,7 +1325,6 @@ static PyObject *pepi_module_init(void) {
PyType_Ready(&pepy_resource_type) < 0)
return NULL;
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"pepy",
@ -1351,13 +1336,8 @@ static PyObject *pepi_module_init(void) {
NULL,
NULL,
};
#endif
#if PY_MAJOR_VERSION >= 3
m = PyModule_Create(&moduledef);
#else
m = Py_InitModule3("pepy", pepy_methods, "Python interface to pe-parse.");
#endif
if (!m)
return NULL;
@ -1384,6 +1364,8 @@ static PyObject *pepi_module_init(void) {
PyModule_AddObject(m, "pepy_resource", (PyObject *) &pepy_resource_type);
PyModule_AddStringMacro(m, PEPY_VERSION);
PyModule_AddStringMacro(m, PEPARSE_VERSION);
PyModule_AddStringConstant(m, "__version__", PEPY_VERSION);
PyModule_AddIntMacro(m, MZ_MAGIC);
PyModule_AddIntMacro(m, NT_MAGIC);
@ -1447,13 +1429,3 @@ static PyObject *pepi_module_init(void) {
return m;
}
#if PY_MAJOR_VERSION >= 3
PyMODINIT_FUNC PyInit_pepy(void) {
return pepi_module_init();
}
#else
PyMODINIT_FUNC initpepy(void) {
pepi_module_init();
}
#endif

View File

@ -1,217 +0,0 @@
pepy
====
pepy (pronounced p-pie) is a python binding to the pe-parse parser.
Building
========
If you can build pe-parse and have a working python environment (headers and
libraries) you can build pepy.
Python 2.7
----------
1. Build pepy:
* python setup.py build
2. Install pepy:
* python setup.py install
**Building on Windows:** If you get a build error of 'Unable to find
vcvarsall.bat', you must set the `VS90COMNTOOLS` environment variable prior
to the appropriate path as per
[this SO article](http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat):
> While running setup.py for package installations, Python 2.7 searches for an
> installed Visual Studio 2008. You can trick Python to use a newer Visual
> Studio by setting the correct path in VS90COMNTOOLS environment variable
> before calling setup.py.
>
> Execute the following command based on the version of Visual Studio installed:
> * Visual Studio 2010 (VS10): `SET VS90COMNTOOLS=%VS100COMNTOOLS%`
> * Visual Studio 2012 (VS11): `SET VS90COMNTOOLS=%VS110COMNTOOLS%`
> * Visual Studio 2013 (VS12): `SET VS90COMNTOOLS=%VS120COMNTOOLS%`
> * Visual Studio 2015/2017 (VS14): `SET VS90COMNTOOLS=%VS140COMNTOOLS%`
Python 3.x
----------
1. Build pepy:
* python3 setup.py build
2. Install pepy:
* python3 setup.py install
**Building on Windows:** Python 3.x is typically installed as _python.exe_
**NOT** _python3.exe_.
Using
=====
Parsed object
-------------
There are a number of objects involved in pepy. The main one is the **parsed**
object. This object is returned by the *parse* method.
```
import pepy
p = pepy.parse("/path/to/exe")
```
The **parsed** object has a number of methods:
* get_entry_point: Return the entry point address
* get_machine_as_str: Return the machine as a human readable string
* get_subsystem_as_str: Return the subsystem as a human readable string
* get_bytes: Return the first N bytes at a given address
* get_sections: Return a list of section objects
* get_imports: Return a list of import objects
* get_exports: Return a list of export objects
* get_relocations: Return a list of relocation objects
* get_resources: Return a list of resource objects
The **parsed** object has a number of attributes:
* signature
* machine
* numberofsections
* timedatestamp
* numberofsymbols
* characteristics
* magic
* majorlinkerver
* minorlinkerver
* codesize
* initdatasize
* uninitdatasize
* entrypointaddr
* baseofcode
* baseofdata
* imagebase
* sectionalignement
* filealignment
* majorosver
* minorosver
* win32ver
* imagesize
* headersize
* checksum
* subsystem
* dllcharacteristics
* stackreservesize
* stackcommitsize
* heapreservesize
* heapcommitsize
* loaderflags
* rvasandsize
Example:
```
import time
import pepy
p = pepy.parse("/path/to/exe")
print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))
ep = p.get_entry_point()
print "Entry point: 0x%x" % ep
```
The *get_sections*, *get_imports*, *get_exports*, *get_relocations* and
*get_resources* methods each return a list of objects. The type of object
depends upon the method called. *get_sections* returns a list of **section**
objects, *get_imports* returns a list of **import** objects, etc.
Section Object
--------------
The **section** object has the following attributes:
* base
* length
* virtaddr
* virtsize
* numrelocs
* numlinenums
* characteristics
* data
Import Object
-------------
The **import** object has the following attributes:
* sym
* name
* addr
Export Object
-------------
The **export** object has the following attributes:
* mod
* func
* addr
Relocation Object
-----------------
The **relocation** object has the following attributes:
* type
* addr
Resource Object
---------------
The **resource** object has the following attributes:
* type_str
* name_str
* lang_str
* type
* name
* lang
* codepage
* RVA
* size
* data
The **resource** object has the following methods:
* type_as_str
Resources are stored in a directory structure. The first three levels of the
are called **type**, **name** and **lang**. Each of these levels can have
either a pre-defined value or a custom string. The pre-defined values are
stored in the *type*, *name* and *lang* attributes. If a custom string is
found it will be stored in the *type_str*, *name_str* and *lang_str*
attributes. The *type_as_str* method can be used to convert a pre-defined
type value to a string representation.
The following code shows how to iterate through resources:
```
import pepy
from hashlib import md5
p = pepy.parse(sys.argv[1])
resources = p.get_resources()
print "Resources: (%i)" % len(resources)
for resource in resources:
print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())
if resource.type_str:
print "\tType string: %s" % resource.type_str
else:
print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())
if resource.name_str:
print "\tName string: %s" % resource.name_str
else:
print "\tName: %s" % hex(resource.name)
if resource.lang_str:
print "\tLang string: %s" % resource.lang_str
else:
print "\tLang: %s" % hex(resource.lang)
print "\tCodepage: %s" % hex(resource.codepage)
print "\tRVA: %s" % hex(resource.RVA)
print "\tSize: %s" % hex(resource.size)
```
Note that some binaries (particularly packed) may have corrupt resource entries.
In these cases you may find that len(resource.data) is 0 but resource.size is
greater than 0. The *size* attribute is the size of the data as declared by the
resource data entry.
Authors
=======
pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com)
pepy was written by Wesley Shields (wxs@atarininja.org)

View File

@ -1,68 +0,0 @@
# Copyright (c) 2013, Wesley Shields <wxs@atarininja.org>. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
from distutils.core import setup, Extension
import os
import sys
import platform
here = os.path.abspath(os.path.dirname(__file__))
SOURCE_FILES = [os.path.join(here, 'pepy.cpp'),
os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'src', 'parse.cpp')),
os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'src', 'buffer.cpp'))]
if platform.system() == 'Windows':
INCLUDE_DIRS = [os.path.abspath(os.path.join(os.path.dirname(sys.executable), 'include')),
os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'include')),
'C:\\usr\\include']
LIBRARY_DIRS = [os.path.abspath(os.path.join(os.path.dirname(sys.executable), 'libs')),
'C:\\usr\\lib']
COMPILE_ARGS = ["/EHsc"]
else:
INCLUDE_DIRS = ['/usr/local/include',
'/opt/local/include',
'/usr/include',
os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'include'))]
LIBRARY_DIRS = ['/usr/lib',
'/usr/local/lib']
COMPILE_ARGS = ["-std=c++11", "-g", "-O0"] # Debug only
extension_mod = Extension('pepy',
sources = SOURCE_FILES,
extra_compile_args = COMPILE_ARGS,
language='c++',
include_dirs = INCLUDE_DIRS,
library_dirs = LIBRARY_DIRS)
setup (name = 'pepy',
version = '0.1',
description = 'python bindings for pe-parse',
author = 'Wesley Shields',
author_email = 'wxs@atarininja.org',
license = 'BSD',
long_description = 'Python bindings for pe-parse',
ext_modules = [extension_mod])

View File

@ -1,99 +0,0 @@
#!/usr/bin/env python
import sys
import time
import pepy
import binascii
from hashlib import md5
try:
p = pepy.parse(sys.argv[1])
except pepy.error as e:
print e
sys.exit(1)
print "Magic: %s" % hex(p.magic)
print "Signature: %s" % hex(p.signature)
print "Machine: %s (%s)" % (hex(p.machine), p.get_machine_as_str())
print "Number of sections: %s" % p.numberofsections
print "Number of symbols: %s" % p.numberofsymbols
print "Characteristics: %s" % hex(p.characteristics)
print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))
print "Major linker version: %s" % hex(p.majorlinkerver)
print "Minor linker version: %s" % hex(p.minorlinkerver)
print "Size of code: %s" % hex(p.codesize)
print "Size of initialized data: %s" % hex(p.initdatasize)
print "Size of uninitialized data: %s" % hex(p.uninitdatasize)
print "Address of entry point: %s" % hex(p.entrypointaddr)
print "Base address of code: %s" % hex(p.baseofcode)
try:
print "Base address of data: %s" % hex(p.baseofdata)
except:
# Not available on PE32+, ignore it.
pass
print "Image base address: %s" % hex(p.imagebase)
print "Section alignment: %s" % hex(p.sectionalignement)
print "File alignment: %s" % hex(p.filealignment)
print "Major OS version: %s" % hex(p.majorosver)
print "Minor OS version: %s" % hex(p.minorosver)
print "Win32 version: %s" % hex(p.win32ver)
print "Size of image: %s" % hex(p.imagesize)
print "Size of headers: %s" % hex(p.headersize)
print "Checksum: %s" % hex(p.checksum)
print "Subsystem: %s (%s)" % (hex(p.subsystem), p.get_subsystem_as_str())
print "DLL characteristics: %s" % hex(p.dllcharacteristics)
print "Size of stack reserve: %s" % hex(p.stackreservesize)
print "Size of stack commit: %s" % hex(p.stackcommitsize)
print "Size of heap reserve: %s" % hex(p.heapreservesize)
print "Size of heap commit: %s" % hex(p.heapcommitsize)
print "Loader flags: %s" % hex(p.loaderflags)
print "Number of RVA and sizes: %s" % hex(p.rvasandsize)
ep = p.get_entry_point()
byts = p.get_bytes(ep, 8)
print "Bytes at %s: %s" % (hex(ep), ' '.join(['0x' + binascii.hexlify(b) for b in str(byts)]))
sections = p.get_sections()
print "Sections: (%i)" % len(sections)
for sect in sections:
print "[+] %s" % sect.name
print "\tBase: %s" % hex(sect.base)
print "\tLength: %s" % sect.length
print "\tVirtual address: %s" % hex(sect.virtaddr)
print "\tVirtual size: %i" % sect.virtsize
print "\tNumber of Relocations: %i" % sect.numrelocs
print "\tNumber of Line Numbers: %i" % sect.numlinenums
print "\tCharacteristics: %s" % hex(sect.characteristics)
if sect.length:
print "\tFirst 10 bytes: 0x%s" % binascii.hexlify(sect.data[:10])
print "\tMD5: %s" % md5(sect.data).hexdigest()
imports = p.get_imports()
print "Imports: (%i)" % len(imports)
for imp in imports:
print "[+] Symbol: %s (%s %s)" % (imp.sym, imp.name, hex(imp.addr))
exports = p.get_exports()
print "Exports: (%i)" % len(exports)
for exp in exports:
print "[+] Module: %s (%s %s)" % (exp.mod, exp.func, hex(exp.addr))
relocations = p.get_relocations()
print "Relocations: (%i)" % len(relocations)
for reloc in relocations:
print "[+] Type: %s (%s)" % (reloc.type, hex(reloc.addr))
resources = p.get_resources()
print "Resources: (%i)" % len(resources)
for resource in resources:
print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())
if resource.type_str:
print "\tType string: %s" % resource.type_str
else:
print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())
if resource.name_str:
print "\tName string: %s" % resource.name_str
else:
print "\tName: %s" % hex(resource.name)
if resource.lang_str:
print "\tLang string: %s" % resource.lang_str
else:
print "\tLang: %s" % hex(resource.lang)
print "\tCodepage: %s" % hex(resource.codepage)
print "\tRVA: %s" % hex(resource.RVA)
print "\tSize: %s" % hex(resource.size)

101
setup.py Normal file
View File

@ -0,0 +1,101 @@
# Copyright (c) 2013, Wesley Shields <wxs@atarininja.org>. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
from setuptools import setup, Extension
import os
import sys
import platform
here = os.path.dirname(__file__)
pepy = os.path.join(here, "pepy")
with open(os.path.join(pepy, "README.md")) as f:
README = f.read()
with open(os.path.join(here, "VERSION")) as f:
VERSION = f.read().strip()
SOURCE_FILES = [
os.path.join(pepy, "pepy.cpp"),
os.path.join(here, "pe-parser-library", "src", "parse.cpp"),
os.path.join(here, "pe-parser-library", "src", "buffer.cpp"),
]
INCLUDE_DIRS = []
LIBRARY_DIRS = []
if platform.system() == "Windows":
SOURCE_FILES.append(
os.path.join(here, "pe-parser-library", "src", "unicode_winapi.cpp")
)
INCLUDE_DIRS += [
os.path.abspath(os.path.join(os.path.dirname(sys.executable), "include")),
os.path.join(here, "pe-parser-library", "include"),
"C:\\usr\\include",
]
LIBRARY_DIRS += [
os.path.abspath(os.path.join(os.path.dirname(sys.executable), "libs")),
"C:\\usr\\lib",
]
COMPILE_ARGS = [
"/EHsc",
f'/D"PEPARSE_VERSION=\\"{VERSION}\\""',
]
else:
SOURCE_FILES.append(
os.path.join(here, "pe-parser-library", "src", "unicode_codecvt.cpp")
)
INCLUDE_DIRS += [
"/usr/local/include",
"/opt/local/include",
"/usr/include",
os.path.join(here, "pe-parser-library", "include"),
]
LIBRARY_DIRS += ["/usr/lib", "/usr/local/lib"]
COMPILE_ARGS = ["-std=c++11", f'-DPEPARSE_VERSION="{VERSION}"']
extension_mod = Extension(
"pepy",
sources=SOURCE_FILES,
extra_compile_args=COMPILE_ARGS,
language="c++",
include_dirs=INCLUDE_DIRS,
library_dirs=LIBRARY_DIRS,
)
setup(
name="pepy",
url="https://github.com/trailofbits/pe-parse",
python_requires=">= 3.6",
version=VERSION,
description="Python bindings for pe-parse",
long_description=README,
long_description_content_type="text/markdown",
author="Wesley Shields",
author_email="wxs@atarininja.org",
license="BSD",
ext_modules=[extension_mod],
)

BIN
test/assets/example.exe Normal file

Binary file not shown.

35
util/release Executable file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# release: perform the chore work required for a pe-parse/pepy release
set -eo pipefail
function installed {
cmd=$(command -v "${1}")
[[ -n "${cmd}" ]] && [[ -f "${cmd}" ]]
return ${?}
}
function die {
>&2 echo "Barf: ${*}"
exit 1
}
# Fail early if we don't have the expected tools.
installed git || die "Missing dependency: git"
# Fail early if `git status` reports any untracked changes.
[[ -n $(git status -s) ]] && die "Untracked changes in repo"
# Next, check the VERSION in version and make sure it doesn't already have a git tag.
[[ -f ./VERSION ]] || die "Missing VERSION file; wrong directory?"
version=v$(<./VERSION)
[[ -n $(git tag -l "${version}") ]] && die "git tag for ${version} already exists!"
# Next, craft a tag for the current HEAD. Push both the current commit and the tag.
git tag "${version}"
git push
git push origin "${version}"
echo OK