diff --git a/.clang-format b/.clang-format index 02e245f..207911d 100644 --- a/.clang-format +++ b/.clang-format @@ -2,7 +2,6 @@ AlignEscapedNewlinesLeft: true AllowShortFunctionsOnASingleLine: false BinPackArguments: false BinPackParameters: false -BreakBeforeBraces: Attach ColumnLimit: 80 IndentCaseLabels: true IndentWidth: 2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0d25599 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,134 @@ +name: CI + +on: + push: + branches: + - master + pull_request: + schedule: + # run CI every day even if no PRs/merges occur + - cron: '0 12 * * *' + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: deps + run: | + sudo apt-get update + sudo apt-get install -y clang-format-9 + + - name: lint + run: | + mkdir build && cd build + cmake .. + cmake --build . --target peparse_format + cd .. && git diff --exit-code + + pe-parse: + strategy: + matrix: + platform: ["ubuntu-latest", "macos-latest"] + build-type: ["Debug", "Release"] + build-shared: ["0", "1"] + compiler: + - { CC: "clang", CXX: "clang++" } + - { CC: "gcc", CXX: "g++" } + exclude: + - platform: macos-latest + compiler: { CC: "gcc", CXX: "g++" } + runs-on: ${{ matrix.platform }} + steps: + - uses: actions/checkout@v2 + - name: build + env: + CC: ${{ matrix.compiler.CC }} + CXX: ${{ matrix.compiler.CXX }} + run: | + mkdir build + cd build + cmake \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} \ + .. + cmake --build . + - name: test + run: | + ./build/dump-pe/dump-pe ./test/assets/example.exe + + pepy: + strategy: + matrix: + platform: ["ubuntu-latest", "macos-latest"] + python: + - "3.6" + - "3.7" + - "3.8" + runs-on: ${{ matrix.platform }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python }} + - name: build + run: | + python3 setup.py build + - name: sdist and install + run: | + python3 setup.py sdist + python3 -m pip install --user dist/*.tar.gz + - name: test + run: | + python3 test/test_pepy.py test/assets/example.exe + + pe-parse-windows: + strategy: + matrix: + build-arch: ["x64", "Win32"] + build-type: ["Debug", "Release"] + build-shared: ["0", "1"] + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - name: build + run: | + mkdir build + cd build + cmake ` + -G "Visual Studio 16 2019" ` + -A ${{ matrix.build-arch }} ` + -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} ` + .. + cmake --build . --config ${{ matrix.build-type }} + - name: install + run: | + cd build + cmake --build . --target install + - name: test + run: | + .\build\bin\dump-pe.exe .\test\assets\example.exe + + pepy-windows: + strategy: + matrix: + python: + - "3.6" + - "3.7" + - "3.8" + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python }} + - name: build + run: | + python setup.py build + - name: install + run: | + python -m pip install --user . + - name: test + run: | + python test/test_pepy.py test/assets/example.exe diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..daf05c4 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +on: + push: + tags: + - 'v*' + +name: release + +jobs: + pypi: + name: upload release to PyPI + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: create release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Release ${{ github.ref }} + draft: false + prerelease: ${{ contains(github.ref, 'pre') || contains(github.ref, 'rc') }} + - name: sdist + run: python3 setup.py sdist + - name: publish + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + diff --git a/.gitignore b/.gitignore index 3aaefac..eeb7a75 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ -*Makefile* +Makefile cmake_install.cmake dump-prog/dump-prog *.swp -python/build +build/ .idea cmake-build-debug cmake-build-release @@ -11,8 +11,10 @@ CMakeSettings.json .vs .vscode examples_build - - +.DS_Store +dist/ +MANIFEST +*.egg-info/ *.stash *.o diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ff11b28..0000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -language: cpp - -os: -- linux -- osx - -compiler: -- clang -- gcc - -before_install: -- ./travis.sh "$TRAVIS_OS_NAME" initialize -script: -- ./travis.sh "$TRAVIS_OS_NAME" build - -matrix: - exclude: - - compiler: gcc - os: osx - -env: - global: - - secure: "O+BGqz4ugoVIJbQTh0dJjKRrsSVzkCYSe0WpRzEWK3l8Mw7hqX300g81TxRwTzN2zfUsROMzaeGaXWfGzYakgW59K1WIioaczxtv2MzzUQTbqzJPa+qQoP9bk/b2wJ5jcOL965/rudRju4UiIwuIgzDAMN3nAfIEJgV/2zANLIg=" - -addons: - coverity_scan: - project: - name: "trailofbits/pe-parse" - description: "Principled, lightweight C/C++ PE parser" - notification_email: dan@trailofbits.com - build_command: "./travis.sh linux build" - branch_pattern: master diff --git a/CMakeLists.txt b/CMakeLists.txt index dfc450d..ff1f191 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,13 @@ -cmake_minimum_required(VERSION 3.7) +cmake_minimum_required(VERSION 3.12 FATAL_ERROR) project(pe-parse) +# NOTE(ww): CMake has bad defaults for install prefixes. +# Instead of fussing over them, install everything to the build directory by default +# and let the user set CMAKE_INSTALL_PREFIX explicitly for their own needs. if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "Default install directory" FORCE) + set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}" CACHE PATH "Default install directory" FORCE) endif () -set(CMAKE_VERBOSE_MAKEFILE True) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") endif () @@ -13,19 +15,45 @@ endif () include(cmake/compilation_flags.cmake) list(APPEND GLOBAL_CXXFLAGS ${DEFAULT_CXX_FLAGS}) -option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF) +option(BUILD_SHARED_LIBS "Build Shared Libraries" ON) option(BUILD_COMMAND_LINE_TOOLS "Build Command Line Tools" ON) +option(PEPARSE_LIBRARY_WARNINGS "Log pe-parse library warnings to stderr" OFF) + if (MSVC) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif () +file(READ "${PROJECT_SOURCE_DIR}/VERSION" PEPARSE_VERSION) +string(STRIP "${PEPARSE_VERSION}" PEPARSE_VERSION) +add_compile_definitions(PEPARSE_VERSION="${PEPARSE_VERSION}") + add_subdirectory(pe-parser-library) if (BUILD_COMMAND_LINE_TOOLS) add_subdirectory(dump-pe) endif () +# `peparse_format` target. +file( + GLOB_RECURSE + PEPARSE_ALL_SOURCES + pe-parser-library/*.cpp + pe-parser-library/*.h + pepy/*.cpp + pepy/*.h + dump-pe/*.cpp + examples/*.cpp + examples/*.h +) +add_custom_target( + peparse_format + COMMAND clang-format -i -style=file ${PEPARSE_ALL_SOURCES} + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMENT "Auto-format the codebase with clang-format" + VERBATIM +) + message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") message(STATUS "Build Shared: ${BUILD_SHARED_LIBS} ${BUILD_SHARED_LIBS_MESSAGE}") message(STATUS "Build Command Line Tools: ${BUILD_COMMAND_LINE_TOOLS}") diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c08017d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,26 @@ +Contributing to pe-parse +======================== + +Hello, and welcome to the contributing guidelines for pe-parse! + +For general building instructions, see the [README](README.md). + +For licensing information, see the [LICENSE](LICENSE.txt) file. pe-parse includes a CLA; you will be +automatically prompted to sign it during your first PR. + +## General contribution guidelines + +* Your changes should be valid C++11 +* Your changes should work across all major compiler vendors (GCC, Clang, MSVC) and all +major operating systems (Linux, macOS, Windows) +* Your changes should be auto-formatted with `clang-format -style=file` +* Your changes should not introduce *mandatory* third-party dependencies + +## Adding features + +Feature additions to either the parsing library or `dump-pe` are welcome! + +Check out the following issue labels for some contribution ideas: + +* [Enhancements](https://github.com/trailofbits/pe-parse/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) +* [Hacktoberfest](https://github.com/trailofbits/pe-parse/issues?q=is%3Aissue+is%3Aopen+label%3Ahacktoberfest) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fa15911 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM alpine:latest +ARG BUILD_TYPE=Release + +LABEL name "pe-parse" +LABEL src "https://github.com/trailofbits/pe-parse" +LABEL creator "Trail of Bits" +LABEL dockerfile_maintenance "William Woodruff " +LABEL desc "Principled, lightweight C/C++ PE parser" + +RUN apk add --no-cache cmake icu-dev clang build-base + +COPY . /app/pe-parse +WORKDIR /app/pe-parse +ENV CC=clang CXX=clang++ +RUN mkdir build && \ + cd build && \ + cmake -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" .. && \ + cmake --build . && \ + cmake --build . --target install + +ENTRYPOINT [ "/usr/bin/dump-pe" ] +CMD ["--help"] diff --git a/LICENSE.txt b/LICENSE similarity index 100% rename from LICENSE.txt rename to LICENSE diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..4edcb82 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include VERSION +include pepy/README.md +include pe-parser-library/include/pe-parse/*.h diff --git a/README.md b/README.md index 42b2f63..3ba8351 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,15 @@ pe-parse -========================================= +======== -[![Build Status](https://travis-ci.org/trailofbits/pe-parse.svg?branch=master)](https://travis-ci.org/trailofbits/pe-parse) -[![Coverity Scan Build Status](https://scan.coverity.com/projects/3671/badge.svg)](https://scan.coverity.com/projects/3671) +[![Build Status](https://img.shields.io/github/workflow/status/trailofbits/pe-parse/CI/master)](https://github.com/trailofbits/pe-parse/actions?query=workflow%3ACI) +[![LGTM Total alerts](https://img.shields.io/lgtm/alerts/g/trailofbits/pe-parse.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/trailofbits/pe-parse/alerts/) -pe-parse is a principled, lightweight parser for windows portable executable files. It was created to assist in compiled program analysis, potentially of programs of unknown origins. This means that it should be resistant to malformed or maliciously crafted PE files, and it should support questions that analysis software would ask of an executable program container. For example, listing relocations, describing imports and exports, and supporting byte reads from virtual addresses as well as file offsets. +pe-parse is a principled, lightweight parser for Windows portable executable files. +It was created to assist in compiled program analysis, potentially of programs of unknown origins. +This means that it should be resistant to malformed or maliciously crafted PE files, and it should +support questions that analysis software would ask of an executable program container. +For example, listing relocations, describing imports and exports, and supporting byte reads from +virtual addresses as well as file offsets. pe-parse supports these use cases via a minimal API that provides methods for * Opening and closing a PE file @@ -16,21 +21,44 @@ pe-parse supports these use cases via a minimal API that provides methods for * Reading bytes from specified virtual addresses * Retrieving the program entry point -The interface is defined in `parser-library/parse.h`. The program in `dump-prog/dump.cpp` is an example of using the parser-library API to dump information about a PE file. +The interface is defined in `parser-library/parse.h`. -Internally, the parser-library uses a bounded buffer abstraction to access information stored in the PE file. This should help in constructing a sane parser that allows for detection of the use of bogus values in the PE that would result in out of bounds accesses of the input buffer. Once data is read from the file it is sanitized and placed in C++ STL containers of internal types. +The program in `dump-prog/dump.cpp` is an example of using the parser-library API to dump +information about a PE file. + +Internally, the parser-library uses a bounded buffer abstraction to access information stored in +the PE file. This should help in constructing a sane parser that allows for detection of the use +of bogus values in the PE that would result in out of bounds accesses of the input buffer. +Once data is read from the file it is sanitized and placed in C++ STL containers of internal types. + +## Installation + +pe-parse can be installed via [vcpkg](https://github.com/microsoft/vcpkg): + +```bash +$ vcpkg install pe-parse +``` + +pe-parse includes Python bindings via `pepy`, which can be installed via `pip`: + +```bash +$ pip3 install pepy +``` + +More information about `pepy` can be found in its [README](./pepy/README.md). + +## Dependencies -Dependencies -======== ### CMake * Debian/Ubuntu: `sudo apt-get install cmake` * RedHat/Fedora: `sudo yum install cmake` * OSX: `brew install cmake` * Windows: Download the installer from the [CMake page](https://cmake.org/download/) -Building -======== +## Building + ### Generic instructions + ``` git clone https://github.com/trailofbits/pe-parse.git cd pe-parse @@ -39,37 +67,40 @@ mkdir build cd build cmake -DCMAKE_BUILD_TYPE=Release .. -cmake --build . --config Release +cmake --build . # optional -cmake --build . --config Release --target install +cmake --build . --target install ``` -### Notes about Windows +### Windows-specific -If you are building on Windows with Visual Studio, the generator option can be used to select the compiler version and the output architecture: +VS 2017 and VS 2019 are supported. ``` # Compile 64-bit binaries with Visual Studio 2017 -cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release .. +cmake -G "Visual Studio 15 2017 Win64" .. -# Compile 32-bit binaries with Visual Studio 2017 -cmake -G "Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=Release .. +# Or, with VS 2019, use the -A flag for architecture +cmake -G "Visual Studio 16 2019" -A Win64 .. + +# Pass the build type at build time +cmake --build . --config Release ``` -Using the library -======= +## Using the library + Once the library is installed, linking to it is easy! Add the following lines in your CMake project: ``` -find_package(peparse REQUIRED) +find_package(pe-parse REQUIRED) -target_link_libraries(your_target_name ${PEPARSE_LIBRARIES}) -target_include_directories(your_target_name PRIVATE ${PEPARSE_INCLUDE_DIRS}) +target_link_libraries(your_target_name PRIVATE pe-parse::pe-parse) ``` -You can see a full example in the examples/peaddrconv folder. +You can see a full example in the [examples/peaddrconv](examples/peaddrconv) folder. -Authors -======= -pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com), with significant contributions from [Wesley Shields](https://github.com/wxsBSD). +## Authors + +pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com), with significant +contributions from [Wesley Shields](https://github.com/wxsBSD). diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..26aaba0 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +1.2.0 diff --git a/cmake/compilation_flags.cmake b/cmake/compilation_flags.cmake index 8791cbb..140a669 100644 --- a/cmake/compilation_flags.cmake +++ b/cmake/compilation_flags.cmake @@ -11,6 +11,7 @@ if (MSVC) else () set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if (NOT MINGW) @@ -31,10 +32,8 @@ else () endif () if (CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "This is a debug build; enabling -Weverything...") - list(APPEND DEFAULT_CXX_FLAGS - -Weverything -Wno-c++98-compat -Wno-missing-prototypes + -Wno-c++98-compat -Wno-missing-prototypes -Wno-missing-variable-declarations -Wno-global-constructors -Wno-exit-time-destructors -Wno-padded -Wno-error ) diff --git a/dump-pe/CMakeLists.txt b/dump-pe/CMakeLists.txt index 1b53afd..bd40723 100644 --- a/dump-pe/CMakeLists.txt +++ b/dump-pe/CMakeLists.txt @@ -1,8 +1,8 @@ -cmake_minimum_required(VERSION 3.7) +cmake_minimum_required(VERSION 3.12 FATAL_ERROR) project(dump-pe) add_executable(${PROJECT_NAME} main.cpp) -target_link_libraries(${PROJECT_NAME} PRIVATE pe-parser-library) +target_link_libraries(${PROJECT_NAME} PRIVATE pe-parse) target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS}) -install(TARGETS ${PROJECT_NAME} DESTINATION "bin") +install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION "bin") diff --git a/dump-pe/main.cpp b/dump-pe/main.cpp index 9560089..f945c85 100644 --- a/dump-pe/main.cpp +++ b/dump-pe/main.cpp @@ -22,16 +22,21 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include -#include #include +#include +#include +#include -#include +#include + +#include "vendor/argh.h" using namespace peparse; -int printExps(void *N, VA funcAddr, std::string &mod, std::string &func) { +int printExps(void *N, + const VA &funcAddr, + const std::string &mod, + const std::string &func) { static_cast(N); auto address = static_cast(funcAddr); @@ -47,7 +52,7 @@ int printExps(void *N, VA funcAddr, std::string &mod, std::string &func) { } int printImports(void *N, - VA impAddr, + const VA &impAddr, const std::string &modName, const std::string &symName) { static_cast(N); @@ -59,33 +64,33 @@ int printImports(void *N, return 0; } -int printRelocs(void *N, VA relocAddr, reloc_type type) { +int printRelocs(void *N, const VA &relocAddr, const reloc_type &type) { static_cast(N); std::cout << "TYPE: "; switch (type) { - case ABSOLUTE: + case RELOC_ABSOLUTE: std::cout << "ABSOLUTE"; break; - case HIGH: + case RELOC_HIGH: std::cout << "HIGH"; break; - case LOW: + case RELOC_LOW: std::cout << "LOW"; break; - case HIGHLOW: + case RELOC_HIGHLOW: std::cout << "HIGHLOW"; break; - case HIGHADJ: + case RELOC_HIGHADJ: std::cout << "HIGHADJ"; break; - case MIPS_JMPADDR: + case RELOC_MIPS_JMPADDR: std::cout << "MIPS_JMPADDR"; break; - case MIPS_JMPADDR16: + case RELOC_MIPS_JMPADDR16: std::cout << "MIPS_JMPADD16"; break; - case DIR64: + case RELOC_DIR64: std::cout << "DIR64"; break; default: @@ -99,12 +104,12 @@ int printRelocs(void *N, VA relocAddr, reloc_type type) { } int printSymbols(void *N, - std::string &strName, - uint32_t &value, - int16_t §ionNumber, - uint16_t &type, - uint8_t &storageClass, - uint8_t &numberOfAuxSymbols) { + const std::string &strName, + const uint32_t &value, + const int16_t §ionNumber, + const uint16_t &type, + const uint8_t &storageClass, + const uint8_t &numberOfAuxSymbols) { static_cast(N); std::cout << "Symbol Name: " << strName << "\n"; @@ -227,7 +232,19 @@ int printSymbols(void *N, return 0; } -int printRsrc(void *N, resource r) { +int printRich(void *N, const rich_entry &r) { + static_cast(N); + std::cout << std::dec; + std::cout << std::setw(10) << "ProdId:" << std::setw(7) << r.ProductId; + std::cout << std::setw(10) << "Build:" << std::setw(7) << r.BuildNumber; + std::cout << std::setw(10) << "Name:" << std::setw(40) + << GetRichProductName(r.BuildNumber) << " " + << GetRichObjectType(r.ProductId); + std::cout << std::setw(10) << "Count:" << std::setw(7) << r.Count << "\n"; + return 0; +} + +int printRsrc(void *N, const resource &r) { static_cast(N); if (r.type_str.length()) @@ -252,10 +269,10 @@ int printRsrc(void *N, resource r) { } int printSecs(void *N, - VA secBase, - std::string &secName, - image_section_header s, - bounded_buffer *data) { + const VA &secBase, + const std::string &secName, + const image_section_header &s, + const bounded_buffer *data) { static_cast(N); static_cast(s); @@ -264,114 +281,173 @@ int printSecs(void *N, if (data) std::cout << "Sec Size: " << std::dec << data->bufLen << "\n"; else - std::cout << "Sec Size: 0" << "\n"; + std::cout << "Sec Size: 0" + << "\n"; return 0; } -#define DUMP_FIELD(x) \ - std::cout << "" #x << ": 0x"; \ - std::cout << std::hex << static_cast(p->peHeader.nt.x) \ - << "\n"; -#define DUMP_DEC_FIELD(x) \ - std::cout << "" #x << ": "; \ - std::cout << std::dec << static_cast(p->peHeader.nt.x) \ - << "\n"; +#define DUMP_FIELD(x) \ + std::cout << "" #x << ": 0x"; \ + std::cout << std::hex << static_cast(p->peHeader.x) << "\n"; +#define DUMP_DEC_FIELD(x) \ + std::cout << "" #x << ": "; \ + std::cout << std::dec << static_cast(p->peHeader.x) << "\n"; +#define DUMP_BOOL_FIELD(x) \ + std::cout << "" #x << ": "; \ + std::cout << std::boolalpha << static_cast(p->peHeader.x) << "\n"; int main(int argc, char *argv[]) { - if (argc != 2 || (argc == 2 && std::strcmp(argv[1], "--help") == 0)) { + + argh::parser cmdl(argv); + + if (cmdl[{"-h", "--help"}] || argc <= 1) { std::cout << "dump-pe utility from Trail of Bits\n"; std::cout << "Repository: https://github.com/trailofbits/pe-parse\n\n"; std::cout << "Usage:\n\tdump-pe /path/to/executable.exe\n"; + return 0; + } else if (cmdl[{"-v", "--version"}]) { + std::cout << "dump-pe (pe-parse) version " << PEPARSE_VERSION << "\n"; + return 0; + } + + parsed_pe *p = ParsePEFromFile(cmdl[1].c_str()); + + if (p == nullptr) { + std::cout << "Error: " << GetPEErr() << " (" << GetPEErrString() << ")" + << "\n"; + std::cout << "Location: " << GetPEErrLoc() << "\n"; return 1; } - parsed_pe *p = ParsePEFromFile(argv[1]); - if (p != NULL) { + // Print DOS header + DUMP_FIELD(dos.e_magic); + DUMP_FIELD(dos.e_cp); + DUMP_FIELD(dos.e_crlc); + DUMP_FIELD(dos.e_cparhdr); + DUMP_FIELD(dos.e_minalloc); + DUMP_FIELD(dos.e_maxalloc); + DUMP_FIELD(dos.e_ss); + DUMP_FIELD(dos.e_sp); + DUMP_FIELD(dos.e_csum); + DUMP_FIELD(dos.e_ip); + DUMP_FIELD(dos.e_cs); + DUMP_FIELD(dos.e_lfarlc); + DUMP_FIELD(dos.e_ovno); + DUMP_FIELD(dos.e_res[0]); + DUMP_FIELD(dos.e_res[1]); + DUMP_FIELD(dos.e_res[2]); + DUMP_FIELD(dos.e_res[3]); + DUMP_FIELD(dos.e_oemid); + DUMP_FIELD(dos.e_oeminfo); + DUMP_FIELD(dos.e_res2[0]); + DUMP_FIELD(dos.e_res2[1]); + DUMP_FIELD(dos.e_res2[2]); + DUMP_FIELD(dos.e_res2[3]); + DUMP_FIELD(dos.e_res2[4]); + DUMP_FIELD(dos.e_res2[5]); + DUMP_FIELD(dos.e_res2[6]); + DUMP_FIELD(dos.e_res2[7]); + DUMP_FIELD(dos.e_res2[8]); + DUMP_FIELD(dos.e_res2[9]); + DUMP_FIELD(dos.e_lfanew); + // Print Rich header info + DUMP_BOOL_FIELD(rich.isPresent); + if (p->peHeader.rich.isPresent) { + DUMP_FIELD(rich.DecryptionKey); + DUMP_FIELD(rich.Checksum); + DUMP_BOOL_FIELD(rich.isValid); + IterRich(p, printRich, NULL); + } // print out some things - DUMP_FIELD(Signature); - DUMP_FIELD(FileHeader.Machine); - DUMP_FIELD(FileHeader.NumberOfSections); - DUMP_DEC_FIELD(FileHeader.TimeDateStamp); - DUMP_FIELD(FileHeader.PointerToSymbolTable); - DUMP_DEC_FIELD(FileHeader.NumberOfSymbols); - DUMP_FIELD(FileHeader.SizeOfOptionalHeader); - DUMP_FIELD(FileHeader.Characteristics); + DUMP_FIELD(nt.Signature); + DUMP_FIELD(nt.FileHeader.Machine); + DUMP_FIELD(nt.FileHeader.NumberOfSections); + DUMP_DEC_FIELD(nt.FileHeader.TimeDateStamp); + DUMP_FIELD(nt.FileHeader.PointerToSymbolTable); + DUMP_DEC_FIELD(nt.FileHeader.NumberOfSymbols); + DUMP_FIELD(nt.FileHeader.SizeOfOptionalHeader); + DUMP_FIELD(nt.FileHeader.Characteristics); if (p->peHeader.nt.OptionalMagic == NT_OPTIONAL_32_MAGIC) { - DUMP_FIELD(OptionalHeader.Magic); - DUMP_DEC_FIELD(OptionalHeader.MajorLinkerVersion); - DUMP_DEC_FIELD(OptionalHeader.MinorLinkerVersion); - DUMP_FIELD(OptionalHeader.SizeOfCode); - DUMP_FIELD(OptionalHeader.SizeOfInitializedData); - DUMP_FIELD(OptionalHeader.SizeOfUninitializedData); - DUMP_FIELD(OptionalHeader.AddressOfEntryPoint); - DUMP_FIELD(OptionalHeader.BaseOfCode); - DUMP_FIELD(OptionalHeader.BaseOfData); - DUMP_FIELD(OptionalHeader.ImageBase); - DUMP_FIELD(OptionalHeader.SectionAlignment); - DUMP_FIELD(OptionalHeader.FileAlignment); - DUMP_DEC_FIELD(OptionalHeader.MajorOperatingSystemVersion); - DUMP_DEC_FIELD(OptionalHeader.MinorOperatingSystemVersion); - DUMP_DEC_FIELD(OptionalHeader.Win32VersionValue); - DUMP_FIELD(OptionalHeader.SizeOfImage); - DUMP_FIELD(OptionalHeader.SizeOfHeaders); - DUMP_FIELD(OptionalHeader.CheckSum); - DUMP_FIELD(OptionalHeader.Subsystem); - DUMP_FIELD(OptionalHeader.DllCharacteristics); - DUMP_FIELD(OptionalHeader.SizeOfStackReserve); - DUMP_FIELD(OptionalHeader.SizeOfStackCommit); - DUMP_FIELD(OptionalHeader.SizeOfHeapReserve); - DUMP_FIELD(OptionalHeader.SizeOfHeapCommit); - DUMP_FIELD(OptionalHeader.LoaderFlags); - DUMP_DEC_FIELD(OptionalHeader.NumberOfRvaAndSizes); + DUMP_FIELD(nt.OptionalHeader.Magic); + DUMP_DEC_FIELD(nt.OptionalHeader.MajorLinkerVersion); + DUMP_DEC_FIELD(nt.OptionalHeader.MinorLinkerVersion); + DUMP_FIELD(nt.OptionalHeader.SizeOfCode); + DUMP_FIELD(nt.OptionalHeader.SizeOfInitializedData); + DUMP_FIELD(nt.OptionalHeader.SizeOfUninitializedData); + DUMP_FIELD(nt.OptionalHeader.AddressOfEntryPoint); + DUMP_FIELD(nt.OptionalHeader.BaseOfCode); + DUMP_FIELD(nt.OptionalHeader.BaseOfData); + DUMP_FIELD(nt.OptionalHeader.ImageBase); + DUMP_FIELD(nt.OptionalHeader.SectionAlignment); + DUMP_FIELD(nt.OptionalHeader.FileAlignment); + DUMP_DEC_FIELD(nt.OptionalHeader.MajorOperatingSystemVersion); + DUMP_DEC_FIELD(nt.OptionalHeader.MinorOperatingSystemVersion); + DUMP_DEC_FIELD(nt.OptionalHeader.Win32VersionValue); + DUMP_FIELD(nt.OptionalHeader.SizeOfImage); + DUMP_FIELD(nt.OptionalHeader.SizeOfHeaders); + DUMP_FIELD(nt.OptionalHeader.CheckSum); + DUMP_FIELD(nt.OptionalHeader.Subsystem); + DUMP_FIELD(nt.OptionalHeader.DllCharacteristics); + DUMP_FIELD(nt.OptionalHeader.SizeOfStackReserve); + DUMP_FIELD(nt.OptionalHeader.SizeOfStackCommit); + DUMP_FIELD(nt.OptionalHeader.SizeOfHeapReserve); + DUMP_FIELD(nt.OptionalHeader.SizeOfHeapCommit); + DUMP_FIELD(nt.OptionalHeader.LoaderFlags); + DUMP_DEC_FIELD(nt.OptionalHeader.NumberOfRvaAndSizes); } else { - DUMP_FIELD(OptionalHeader64.Magic); - DUMP_DEC_FIELD(OptionalHeader64.MajorLinkerVersion); - DUMP_DEC_FIELD(OptionalHeader64.MinorLinkerVersion); - DUMP_FIELD(OptionalHeader64.SizeOfCode); - DUMP_FIELD(OptionalHeader64.SizeOfInitializedData); - DUMP_FIELD(OptionalHeader64.SizeOfUninitializedData); - DUMP_FIELD(OptionalHeader64.AddressOfEntryPoint); - DUMP_FIELD(OptionalHeader64.BaseOfCode); - DUMP_FIELD(OptionalHeader64.ImageBase); - DUMP_FIELD(OptionalHeader64.SectionAlignment); - DUMP_FIELD(OptionalHeader64.FileAlignment); - DUMP_DEC_FIELD(OptionalHeader64.MajorOperatingSystemVersion); - DUMP_DEC_FIELD(OptionalHeader64.MinorOperatingSystemVersion); - DUMP_DEC_FIELD(OptionalHeader64.Win32VersionValue); - DUMP_FIELD(OptionalHeader64.SizeOfImage); - DUMP_FIELD(OptionalHeader64.SizeOfHeaders); - DUMP_FIELD(OptionalHeader64.CheckSum); - DUMP_FIELD(OptionalHeader64.Subsystem); - DUMP_FIELD(OptionalHeader64.DllCharacteristics); - DUMP_FIELD(OptionalHeader64.SizeOfStackReserve); - DUMP_FIELD(OptionalHeader64.SizeOfStackCommit); - DUMP_FIELD(OptionalHeader64.SizeOfHeapReserve); - DUMP_FIELD(OptionalHeader64.SizeOfHeapCommit); - DUMP_FIELD(OptionalHeader64.LoaderFlags); - DUMP_DEC_FIELD(OptionalHeader64.NumberOfRvaAndSizes); + DUMP_FIELD(nt.OptionalHeader64.Magic); + DUMP_DEC_FIELD(nt.OptionalHeader64.MajorLinkerVersion); + DUMP_DEC_FIELD(nt.OptionalHeader64.MinorLinkerVersion); + DUMP_FIELD(nt.OptionalHeader64.SizeOfCode); + DUMP_FIELD(nt.OptionalHeader64.SizeOfInitializedData); + DUMP_FIELD(nt.OptionalHeader64.SizeOfUninitializedData); + DUMP_FIELD(nt.OptionalHeader64.AddressOfEntryPoint); + DUMP_FIELD(nt.OptionalHeader64.BaseOfCode); + DUMP_FIELD(nt.OptionalHeader64.ImageBase); + DUMP_FIELD(nt.OptionalHeader64.SectionAlignment); + DUMP_FIELD(nt.OptionalHeader64.FileAlignment); + DUMP_DEC_FIELD(nt.OptionalHeader64.MajorOperatingSystemVersion); + DUMP_DEC_FIELD(nt.OptionalHeader64.MinorOperatingSystemVersion); + DUMP_DEC_FIELD(nt.OptionalHeader64.Win32VersionValue); + DUMP_FIELD(nt.OptionalHeader64.SizeOfImage); + DUMP_FIELD(nt.OptionalHeader64.SizeOfHeaders); + DUMP_FIELD(nt.OptionalHeader64.CheckSum); + DUMP_FIELD(nt.OptionalHeader64.Subsystem); + DUMP_FIELD(nt.OptionalHeader64.DllCharacteristics); + DUMP_FIELD(nt.OptionalHeader64.SizeOfStackReserve); + DUMP_FIELD(nt.OptionalHeader64.SizeOfStackCommit); + DUMP_FIELD(nt.OptionalHeader64.SizeOfHeapReserve); + DUMP_FIELD(nt.OptionalHeader64.SizeOfHeapCommit); + DUMP_FIELD(nt.OptionalHeader64.LoaderFlags); + DUMP_DEC_FIELD(nt.OptionalHeader64.NumberOfRvaAndSizes); } #undef DUMP_FIELD #undef DUMP_DEC_FIELD - std::cout << "Imports: " << "\n"; + std::cout << "Imports: " + << "\n"; IterImpVAString(p, printImports, NULL); - std::cout << "Relocations: " << "\n"; + std::cout << "Relocations: " + << "\n"; IterRelocs(p, printRelocs, NULL); - std::cout << "Symbols (symbol table): " << "\n"; + std::cout << "Symbols (symbol table): " + << "\n"; IterSymbols(p, printSymbols, NULL); - std::cout << "Sections: " << "\n"; + std::cout << "Sections: " + << "\n"; IterSec(p, printSecs, NULL); - std::cout << "Exports: " << "\n"; + std::cout << "Exports: " + << "\n"; IterExpVA(p, printExps, NULL); // read the first 8 bytes from the entry point and print them VA entryPoint; if (GetEntryPoint(p, entryPoint)) { std::cout << "First 8 bytes from entry point (0x"; - - std::cout << std::hex << entryPoint << "):" << "\n"; + std::cout << std::hex << entryPoint << "):" + << "\n"; for (std::size_t i = 0; i < 8; i++) { std::uint8_t b; if (!ReadByteAtVA(p, i + entryPoint, b)) { @@ -384,14 +460,12 @@ int main(int argc, char *argv[]) { std::cout << "\n"; } - std::cout << "Resources: " << "\n"; - IterRsrc(p, printRsrc, NULL); - DestructParsedPE(p); - } else { - std::cout << "Error: " << GetPEErr() << " (" << GetPEErrString() << ")" + std::cout << "Resources: " << "\n"; - std::cout << "Location: " << GetPEErrLoc() << "\n"; - } + IterRsrc(p, printRsrc, NULL); - return 0; + DestructParsedPE(p); + + return 0; + } } diff --git a/dump-pe/vendor/argh.h b/dump-pe/vendor/argh.h new file mode 100644 index 0000000..7e03a18 --- /dev/null +++ b/dump-pe/vendor/argh.h @@ -0,0 +1,463 @@ +/* + * Copyright (c) 2016, Adi Shavit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of nor the names of its contributors may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace argh +{ + // Terminology: + // A command line is composed of 2 types of args: + // 1. Positional args, i.e. free standing values + // 2. Options: args beginning with '-'. We identify two kinds: + // 2.1: Flags: boolean options => (exist ? true : false) + // 2.2: Parameters: a name followed by a non-option value + +#if !defined(__GNUC__) || (__GNUC__ >= 5) + using string_stream = std::istringstream; +#else + // Until GCC 5, istringstream did not have a move constructor. + // stringstream_proxy is used instead, as a workaround. + class stringstream_proxy + { + public: + stringstream_proxy() = default; + + // Construct with a value. + stringstream_proxy(std::string const& value) : + stream_(value) + {} + + // Copy constructor. + stringstream_proxy(const stringstream_proxy& other) : + stream_(other.stream_.str()) + { + stream_.setstate(other.stream_.rdstate()); + } + + void setstate(std::ios_base::iostate state) { stream_.setstate(state); } + + // Stream out the value of the parameter. + // If the conversion was not possible, the stream will enter the fail state, + // and operator bool will return false. + template + stringstream_proxy& operator >> (T& thing) + { + stream_ >> thing; + return *this; + } + + + // Get the string value. + std::string str() const { return stream_.str(); } + + std::stringbuf* rdbuf() const { return stream_.rdbuf(); } + + // Check the state of the stream. + // False when the most recent stream operation failed + explicit operator bool() const { return !!stream_; } + + ~stringstream_proxy() = default; + private: + std::istringstream stream_; + }; + using string_stream = stringstream_proxy; +#endif + + class parser + { + public: + enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0, + PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1, + NO_SPLIT_ON_EQUALSIGN = 1 << 2, + SINGLE_DASH_IS_MULTIFLAG = 1 << 3, + }; + + parser() = default; + + parser(std::initializer_list pre_reg_names) + { add_params(pre_reg_names); } + + parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION) + { parse(argv, mode); } + + parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION) + { parse(argc, argv, mode); } + + void add_param(std::string const& name); + void add_params(std::initializer_list init_list); + + void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION); + void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION); + + std::multiset const& flags() const { return flags_; } + std::map const& params() const { return params_; } + std::vector const& pos_args() const { return pos_args_; } + + // begin() and end() for using range-for over positional args. + std::vector::const_iterator begin() const { return pos_args_.cbegin(); } + std::vector::const_iterator end() const { return pos_args_.cend(); } + size_t size() const { return pos_args_.size(); } + + ////////////////////////////////////////////////////////////////////////// + // Accessors + + // flag (boolean) accessors: return true if the flag appeared, otherwise false. + bool operator[](std::string const& name) const; + + // multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false. + bool operator[](std::initializer_list init_list) const; + + // returns positional arg string by order. Like argv[] but without the options + std::string const& operator[](size_t ind) const; + + // returns a std::istream that can be used to convert a positional arg to a typed value. + string_stream operator()(size_t ind) const; + + // same as above, but with a default value in case the arg is missing (index out of range). + template + string_stream operator()(size_t ind, T&& def_val) const; + + // parameter accessors, give a name get an std::istream that can be used to convert to a typed value. + // call .str() on result to get as string + string_stream operator()(std::string const& name) const; + + // accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value. + // call .str() on result to get as string + // returns the first value in the list to be found. + string_stream operator()(std::initializer_list init_list) const; + + // same as above, but with a default value in case the param was missing. + // Non-string def_val types must have an operator<<() (output stream operator) + // If T only has an input stream operator, pass the string version of the type as in "3" instead of 3. + template + string_stream operator()(std::string const& name, T&& def_val) const; + + // same as above but for a list of names. returns the first value to be found. + template + string_stream operator()(std::initializer_list init_list, T&& def_val) const; + + private: + string_stream bad_stream() const; + std::string trim_leading_dashes(std::string const& name) const; + bool is_number(std::string const& arg) const; + bool is_option(std::string const& arg) const; + bool got_flag(std::string const& name) const; + bool is_param(std::string const& name) const; + + private: + std::vector args_; + std::map params_; + std::vector pos_args_; + std::multiset flags_; + std::set registeredParams_; + std::string empty_; + }; + + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::parse(const char * const argv[], int mode) + { + int argc = 0; + for (auto argvp = argv; *argvp; ++argc, ++argvp); + parse(argc, argv, mode); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/) + { + // convert to strings + args_.resize(static_cast(argc)); + std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg; }); + + // parse line + for (auto i = 0u; i < args_.size(); ++i) + { + if (!is_option(args_[i])) + { + pos_args_.emplace_back(args_[i]); + continue; + } + + auto name = trim_leading_dashes(args_[i]); + + if (!(mode & NO_SPLIT_ON_EQUALSIGN)) + { + auto equalPos = name.find('='); + if (equalPos != std::string::npos) + { + params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) }); + continue; + } + } + + // if the option is unregistered and should be a multi-flag + if (1 == (args_[i].size() - name.size()) && // single dash + argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode + !is_param(name)) // unregistered + { + std::string keep_param; + + if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param + { + keep_param += name.back(); + name.resize(name.size() - 1); + } + + for (auto const& c : name) + { + flags_.emplace(std::string{ c }); + } + + if (!keep_param.empty()) + { + name = keep_param; + } + else + { + continue; // do not consider other options for this arg + } + } + + // any potential option will get as its value the next arg, unless that arg is an option too + // in that case it will be determined a flag. + if (i == args_.size() - 1 || is_option(args_[i + 1])) + { + flags_.emplace(name); + continue; + } + + // if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped + // otherwise we have 2 modes: + // PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag. + // The following value (the next arg) will be a free parameter. + // + // PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg + // will be the value of that option. + + assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION) + || !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION)); + + bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION; + + if (is_param(name) || preferParam) + { + params_.insert({ name, args_[i + 1] }); + ++i; // skip next value, it is not a free parameter + continue; + } + else + { + flags_.emplace(name); + } + }; + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::bad_stream() const + { + string_stream bad; + bad.setstate(std::ios_base::failbit); + return bad; + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::is_number(std::string const& arg) const + { + // inefficient but simple way to determine if a string is a number (which can start with a '-') + std::istringstream istr(arg); + double number; + istr >> number; + return !(istr.fail() || istr.bad()); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::is_option(std::string const& arg) const + { + assert(0 != arg.size()); + if (is_number(arg)) + return false; + return '-' == arg[0]; + } + + ////////////////////////////////////////////////////////////////////////// + + inline std::string parser::trim_leading_dashes(std::string const& name) const + { + auto pos = name.find_first_not_of('-'); + return std::string::npos != pos ? name.substr(pos) : name; + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool argh::parser::got_flag(std::string const& name) const + { + return flags_.end() != flags_.find(trim_leading_dashes(name)); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool argh::parser::is_param(std::string const& name) const + { + return registeredParams_.count(name); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::operator[](std::string const& name) const + { + return got_flag(name); + } + + ////////////////////////////////////////////////////////////////////////// + + inline bool parser::operator[](std::initializer_list init_list) const + { + return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); }); + } + + ////////////////////////////////////////////////////////////////////////// + + inline std::string const& parser::operator[](size_t ind) const + { + if (ind < pos_args_.size()) + return pos_args_[ind]; + return empty_; + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::operator()(std::string const& name) const + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + return bad_stream(); + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::operator()(std::initializer_list init_list) const + { + for (auto& name : init_list) + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + } + return bad_stream(); + } + + ////////////////////////////////////////////////////////////////////////// + + template + string_stream parser::operator()(std::string const& name, T&& def_val) const + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + + std::ostringstream ostr; + ostr.precision(std::numeric_limits::max_digits10); + ostr << def_val; + return string_stream(ostr.str()); // use default + } + + ////////////////////////////////////////////////////////////////////////// + + // same as above but for a list of names. returns the first value to be found. + template + string_stream parser::operator()(std::initializer_list init_list, T&& def_val) const + { + for (auto& name : init_list) + { + auto optIt = params_.find(trim_leading_dashes(name)); + if (params_.end() != optIt) + return string_stream(optIt->second); + } + std::ostringstream ostr; + ostr.precision(std::numeric_limits::max_digits10); + ostr << def_val; + return string_stream(ostr.str()); // use default + } + + ////////////////////////////////////////////////////////////////////////// + + inline string_stream parser::operator()(size_t ind) const + { + if (pos_args_.size() <= ind) + return bad_stream(); + + return string_stream(pos_args_[ind]); + } + + ////////////////////////////////////////////////////////////////////////// + + template + string_stream parser::operator()(size_t ind, T&& def_val) const + { + if (pos_args_.size() <= ind) + { + std::ostringstream ostr; + ostr.precision(std::numeric_limits::max_digits10); + ostr << def_val; + return string_stream(ostr.str()); + } + + return string_stream(pos_args_[ind]); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::add_param(std::string const& name) + { + registeredParams_.insert(trim_leading_dashes(name)); + } + + ////////////////////////////////////////////////////////////////////////// + + inline void parser::add_params(std::initializer_list init_list) + { + for (auto& name : init_list) + registeredParams_.insert(trim_leading_dashes(name)); + } +} diff --git a/dump-prog/meson.build b/dump-prog/meson.build deleted file mode 100644 index 2625ea7..0000000 --- a/dump-prog/meson.build +++ /dev/null @@ -1,7 +0,0 @@ -dump_prog = executable( - 'dump-prog', - 'dump.cpp', - include_directories : [ incdirs ], - install : true, - install_dir : join_paths(get_option('datadir'), 'pe-parse/examples'), - link_with : pe_parser_library) diff --git a/examples/peaddrconv/CMakeLists.txt b/examples/peaddrconv/CMakeLists.txt index 3036b9d..fce13e1 100644 --- a/examples/peaddrconv/CMakeLists.txt +++ b/examples/peaddrconv/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12 FATAL_ERROR) project(peaddrconv) if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) @@ -35,11 +35,10 @@ else () endif () endif () -find_package(peparse REQUIRED) +find_package(pe-parse REQUIRED) add_executable(${PROJECT_NAME} main.cpp) -target_link_libraries(${PROJECT_NAME} ${PEPARSE_LIBRARIES}) -target_include_directories(${PROJECT_NAME} PRIVATE ${PEPARSE_INCLUDE_DIR}) +target_link_libraries(${PROJECT_NAME} pe-parse::pe-parse) target_compile_options(${PROJECT_NAME} PRIVATE ${PEADDRCONV_CXXFLAGS}) install(TARGETS ${PROJECT_NAME} DESTINATION "bin") diff --git a/examples/peaddrconv/main.cpp b/examples/peaddrconv/main.cpp index f671110..67d727c 100644 --- a/examples/peaddrconv/main.cpp +++ b/examples/peaddrconv/main.cpp @@ -1,12 +1,12 @@ +#include #include #include #include -#include #include #include -#include +#include using ParsedPeRef = std::unique_ptr; @@ -243,7 +243,9 @@ bool convertAddress(ParsedPeRef &pe, result); } - default: { return false; } + default: { + return false; + } } } @@ -262,7 +264,8 @@ int main(int argc, char *argv[]) { char *last_parsed_char = nullptr; errno = 0; - std::uint64_t address = std::strtoull(address_as_string, &last_parsed_char, 16); + std::uint64_t address = + std::strtoull(address_as_string, &last_parsed_char, 16); if (address == 0U && *last_parsed_char != 0) { std::cout << "Invalid address specified\n"; return 1; diff --git a/meson.build b/meson.build deleted file mode 100644 index 3a5f4a1..0000000 --- a/meson.build +++ /dev/null @@ -1,8 +0,0 @@ -project('pe-parse', - 'cpp', - default_options : [ 'cpp_std=c++11' ], - license : [ 'MIT' ], - ) - -subdir('parser-library') -subdir('dump-prog') diff --git a/parser-library/meson.build b/parser-library/meson.build deleted file mode 100644 index 451bc9b..0000000 --- a/parser-library/meson.build +++ /dev/null @@ -1,11 +0,0 @@ -incdirs = include_directories('.') -parser_source = [ - 'buffer.cpp', - 'parse.cpp' - ] - -pe_parser_library = library('pe-parser-library', - sources : parser_source, - install : true - ) -install_headers('parse.h', subdir : 'pe-parse') diff --git a/pe-parser-library/CMakeLists.txt b/pe-parser-library/CMakeLists.txt index 1a692a4..1962f79 100644 --- a/pe-parser-library/CMakeLists.txt +++ b/pe-parser-library/CMakeLists.txt @@ -1,24 +1,63 @@ -cmake_minimum_required(VERSION 3.7) -project(pe-parser-library) +cmake_minimum_required(VERSION 3.12 FATAL_ERROR) +project(pe-parse) + +message(STATUS "VERSION file: ${PROJECT_SOURCE_DIR}/../VERSION") # List all files explicitly; this will make IDEs happy (i.e. QtCreator, CLion, ...) list(APPEND PEPARSERLIB_SOURCEFILES - include/parser-library/parse.h - include/parser-library/nt-headers.h - include/parser-library/to_string.h + include/pe-parse/parse.h + include/pe-parse/nt-headers.h + include/pe-parse/to_string.h src/buffer.cpp src/parse.cpp ) +# NOTE(ww): On Windows we use the Win32 API's built-in UTF16 conversion +# routines; on other platforms we use codecvt. codecvt is nominally deprecated +# in C++17 and onwards, but will probably be available for quite some time. +# Previous versions of pe-parse used ICU when available, but this caused +# DLL hell on Windows and wasn't worth the additional dependency. +if(MSVC) + list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_winapi.cpp) +else() + list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp) +endif() + add_library(${PROJECT_NAME} ${PEPARSERLIB_SOURCEFILES}) -target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + +if(PEPARSE_LIBRARY_WARNINGS) + target_compile_definitions(${PROJECT_NAME} PRIVATE PEPARSE_LIBRARY_WARNINGS=1) +endif () + +target_include_directories( + ${PROJECT_NAME} + PUBLIC + $ + $ +) target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS}) -install(TARGETS ${PROJECT_NAME} - RUNTIME DESTINATION "bin" - LIBRARY DESTINATION "lib" - ARCHIVE DESTINATION "lib" +install( + TARGETS ${PROJECT_NAME} + EXPORT pe-parse-config + RUNTIME + DESTINATION "bin" + LIBRARY + DESTINATION "lib" + ARCHIVE + DESTINATION "lib" ) -install(FILES "cmake/peparse-config.cmake" DESTINATION "lib/cmake/peparse") -install(DIRECTORY "include/parser-library" DESTINATION "include") +export( + TARGETS ${PROJECT_NAME} + NAMESPACE pe-parse:: + FILE "${CMAKE_CURRENT_BINARY_DIR}/pe-parse-config.cmake" +) +install( + EXPORT + pe-parse-config + DESTINATION "lib/cmake/pe-parse" + NAMESPACE pe-parse:: + EXPORT_LINK_INTERFACE_LIBRARIES +) +install(DIRECTORY "include/pe-parse" DESTINATION "include") diff --git a/pe-parser-library/cmake/pe-parse-config.cmake b/pe-parser-library/cmake/pe-parse-config.cmake new file mode 100644 index 0000000..a3c8b42 --- /dev/null +++ b/pe-parser-library/cmake/pe-parse-config.cmake @@ -0,0 +1,5 @@ +find_path(PEPARSE_INCLUDE_DIR "pe-parse/parse.h") +find_library(PEPARSE_LIBRARIES NAMES "libpe-parse" "pe-parse") + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(pe-parse DEFAULT_MSG PEPARSE_INCLUDE_DIR PEPARSE_LIBRARIES) diff --git a/pe-parser-library/cmake/peparse-config.cmake b/pe-parser-library/cmake/peparse-config.cmake deleted file mode 100644 index 940ecb7..0000000 --- a/pe-parser-library/cmake/peparse-config.cmake +++ /dev/null @@ -1,5 +0,0 @@ -find_path(PEPARSE_INCLUDE_DIR $) -find_library(PEPARSE_LIBRARIES NAMES "libpe-parser-library" "pe-parser-library") - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(peparse DEFAULT_MSG PEPARSE_INCLUDE_DIR PEPARSE_LIBRARIES) diff --git a/pe-parser-library/include/parser-library/nt-headers.h b/pe-parser-library/include/pe-parse/nt-headers.h similarity index 75% rename from pe-parser-library/include/parser-library/nt-headers.h rename to pe-parser-library/include/pe-parse/nt-headers.h index ee94091..1dc071e 100644 --- a/pe-parser-library/include/parser-library/nt-headers.h +++ b/pe-parser-library/include/pe-parse/nt-headers.h @@ -26,16 +26,16 @@ THE SOFTWARE. #include #include - -#define _offset(t, f) \ - static_cast( \ - reinterpret_cast(&static_cast(nullptr)->f)) +#include // need to pack these structure definitions // some constant definitions // clang-format off namespace peparse { +constexpr std::uint32_t RICH_MAGIC_END = 0x68636952; +constexpr std::uint32_t RICH_MAGIC_START = 0x536e6144; +constexpr std::uint32_t RICH_OFFSET = 0x80; constexpr std::uint16_t MZ_MAGIC = 0x5A4D; constexpr std::uint32_t NT_MAGIC = 0x00004550; constexpr std::uint16_t NUM_DIR_ENTRIES = 16; @@ -43,22 +43,8 @@ constexpr std::uint16_t NT_OPTIONAL_32_MAGIC = 0x10B; constexpr std::uint16_t NT_OPTIONAL_64_MAGIC = 0x20B; constexpr std::uint16_t NT_SHORT_NAME_LEN = 8; constexpr std::uint16_t SYMTAB_RECORD_LEN = 18; -constexpr std::uint16_t DIR_EXPORT = 0; -constexpr std::uint16_t DIR_IMPORT = 1; -constexpr std::uint16_t DIR_RESOURCE = 2; -constexpr std::uint16_t DIR_EXCEPTION = 3; -constexpr std::uint16_t DIR_SECURITY = 4; -constexpr std::uint16_t DIR_BASERELOC = 5; -constexpr std::uint16_t DIR_DEBUG = 6; -constexpr std::uint16_t DIR_ARCHITECTURE = 7; -constexpr std::uint16_t DIR_GLOBALPTR = 8; -constexpr std::uint16_t DIR_TLS = 9; -constexpr std::uint16_t DIR_LOAD_CONFIG = 10; -constexpr std::uint16_t DIR_BOUND_IMPORT = 11; -constexpr std::uint16_t DIR_IAT = 12; -constexpr std::uint16_t DIR_DELAY_IMPORT = 13; -constexpr std::uint16_t DIR_COM_DESCRIPTOR = 14; +#ifndef _PEPARSE_WINDOWS_CONFLICTS // Machine Types constexpr std::uint16_t IMAGE_FILE_MACHINE_UNKNOWN = 0x0; constexpr std::uint16_t IMAGE_FILE_MACHINE_ALPHA = 0x1d3; // Alpha_AXP @@ -220,6 +206,20 @@ constexpr std::uint8_t IMAGE_SYM_CLASS_FILE = 103; constexpr std::uint8_t IMAGE_SYM_CLASS_SECTION = 104; constexpr std::uint8_t IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105; constexpr std::uint8_t IMAGE_SYM_CLASS_CLR_TOKEN = 107; + +// Optional header DLL characteristics +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE = 0x0040; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY = 0x0080; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NX_COMPAT = 0x0100; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = 0x0200; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_SEH = 0x0400; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_BIND = 0x0800; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_APPCONTAINER = 0x1000; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = 0x2000; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_GUARD_CF = 0x4000; +constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000; +#endif // clang-format on struct dos_header { @@ -259,6 +259,25 @@ struct data_directory { std::uint32_t Size; }; +enum data_directory_kind { + DIR_EXPORT = 0, + DIR_IMPORT = 1, + DIR_RESOURCE = 2, + DIR_EXCEPTION = 3, + DIR_SECURITY = 4, + DIR_BASERELOC = 5, + DIR_DEBUG = 6, + DIR_ARCHITECTURE = 7, + DIR_GLOBALPTR = 8, + DIR_TLS = 9, + DIR_LOAD_CONFIG = 10, + DIR_BOUND_IMPORT = 11, + DIR_IAT = 12, + DIR_DELAY_IMPORT = 13, + DIR_COM_DESCRIPTOR = 14, + DIR_RESERVED = 15, +}; + struct optional_header_32 { std::uint16_t Magic; std::uint8_t MajorLinkerVersion; @@ -338,6 +357,22 @@ struct nt_header_32 { std::uint16_t OptionalMagic; }; +struct rich_entry { + std::uint16_t ProductId; + std::uint16_t BuildNumber; + std::uint32_t Count; +}; + +struct rich_header { + std::uint32_t StartSignature; + std::vector Entries; + std::uint32_t EndSignature; + std::uint32_t DecryptionKey; + std::uint32_t Checksum; + bool isPresent; + bool isValid; +}; + /* * This structure is only used to know how far to move the offset * when parsing resources. The data is stored in a resource_dir_entry @@ -418,19 +453,116 @@ struct export_dir_table { }; enum reloc_type { - ABSOLUTE = 0, - HIGH = 1, - LOW = 2, - HIGHLOW = 3, - HIGHADJ = 4, - MIPS_JMPADDR = 5, - MIPS_JMPADDR16 = 9, - IA64_IMM64 = 9, - DIR64 = 10 + RELOC_ABSOLUTE = 0, + RELOC_HIGH = 1, + RELOC_LOW = 2, + RELOC_HIGHLOW = 3, + RELOC_HIGHADJ = 4, + RELOC_MIPS_JMPADDR = 5, + RELOC_MIPS_JMPADDR16 = 9, + RELOC_IA64_IMM64 = 9, + RELOC_DIR64 = 10 }; struct reloc_block { std::uint32_t PageRVA; std::uint32_t BlockSize; }; + +struct image_load_config_code_integrity { + std::uint16_t Flags; + std::uint16_t Catalog; + std::uint32_t CatalogOffset; + std::uint32_t Reserved; +}; + +struct image_load_config_32 { + std::uint32_t Size; + std::uint32_t TimeDateStamp; + std::uint16_t MajorVersion; + std::uint16_t MinorVersion; + std::uint32_t GlobalFlagsClear; + std::uint32_t GlobalFlagsSet; + std::uint32_t CriticalSectionDefaultTimeout; + std::uint32_t DeCommitFreeBlockThreshold; + std::uint32_t DeCommitTotalFreeThreshold; + std::uint32_t LockPrefixTable; + std::uint32_t MaximumAllocationSize; + std::uint32_t VirtualMemoryThreshold; + std::uint32_t ProcessHeapFlags; + std::uint32_t ProcessAffinityMask; + std::uint16_t CSDVersion; + std::uint16_t DependentLoadFlags; + std::uint32_t EditList; + std::uint32_t SecurityCookie; + std::uint32_t SEHandlerTable; + std::uint32_t SEHandlerCount; + std::uint32_t GuardCFCheckFunctionPointer; + std::uint32_t GuardCFDispatchFunctionPointer; + std::uint32_t GuardCFFunctionTable; + std::uint32_t GuardCFFunctionCount; + std::uint32_t GuardFlags; + image_load_config_code_integrity CodeIntegrity; + std::uint32_t GuardAddressTakenIatEntryTable; + std::uint32_t GuardAddressTakenIatEntryCount; + std::uint32_t GuardLongJumpTargetTable; + std::uint32_t GuardLongJumpTargetCount; + std::uint32_t DynamicValueRelocTable; + std::uint32_t CHPEMetadataPointer; + std::uint32_t GuardRFFailureRoutine; + std::uint32_t GuardRFFailureRoutineFunctionPointer; + std::uint32_t DynamicValueRelocTableOffset; + std::uint16_t DynamicValueRelocTableSection; + std::uint16_t Reserved2; + std::uint32_t GuardRFVerifyStackPointerFunctionPointer; + std::uint32_t HotPatchTableOffset; + std::uint32_t Reserved3; + std::uint32_t EnclaveConfigurationPointer; + std::uint32_t VolatileMetadataPointer; +}; + +struct image_load_config_64 { + std::uint32_t Size; + std::uint32_t TimeDateStamp; + std::uint16_t MajorVersion; + std::uint16_t MinorVersion; + std::uint32_t GlobalFlagsClear; + std::uint32_t GlobalFlagsSet; + std::uint32_t CriticalSectionDefaultTimeout; + std::uint64_t DeCommitFreeBlockThreshold; + std::uint64_t DeCommitTotalFreeThreshold; + std::uint64_t LockPrefixTable; + std::uint64_t MaximumAllocationSize; + std::uint64_t VirtualMemoryThreshold; + std::uint64_t ProcessAffinityMask; + std::uint32_t ProcessHeapFlags; + std::uint16_t CSDVersion; + std::uint16_t DependentLoadFlags; + std::uint64_t EditList; + std::uint64_t SecurityCookie; + std::uint64_t SEHandlerTable; + std::uint64_t SEHandlerCount; + std::uint64_t GuardCFCheckFunctionPointer; + std::uint64_t GuardCFDispatchFunctionPointer; + std::uint64_t GuardCFFunctionTable; + std::uint64_t GuardCFFunctionCount; + std::uint32_t GuardFlags; + image_load_config_code_integrity CodeIntegrity; + std::uint64_t GuardAddressTakenIatEntryTable; + std::uint64_t GuardAddressTakenIatEntryCount; + std::uint64_t GuardLongJumpTargetTable; + std::uint64_t GuardLongJumpTargetCount; + std::uint64_t DynamicValueRelocTable; + std::uint64_t CHPEMetadataPointer; + std::uint64_t GuardRFFailureRoutine; + std::uint64_t GuardRFFailureRoutineFunctionPointer; + std::uint32_t DynamicValueRelocTableOffset; + std::uint16_t DynamicValueRelocTableSection; + std::uint16_t Reserved2; + std::uint64_t GuardRFVerifyStackPointerFunctionPointer; + std::uint32_t HotPatchTableOffset; + std::uint32_t Reserved3; + std::uint64_t EnclaveConfigurationPointer; + std::uint64_t VolatileMetadataPointer; +}; } // namespace peparse diff --git a/pe-parser-library/include/parser-library/parse.h b/pe-parser-library/include/pe-parse/parse.h similarity index 71% rename from pe-parser-library/include/parser-library/parse.h rename to pe-parser-library/include/pe-parse/parse.h index ed6d4c6..879f377 100644 --- a/pe-parser-library/include/parser-library/parse.h +++ b/pe-parser-library/include/pe-parse/parse.h @@ -25,13 +25,14 @@ THE SOFTWARE. #pragma once #include +#include #include #include "nt-headers.h" #include "to_string.h" #ifdef _MSC_VER -#define __typeof__(x) std::remove_reference < decltype(x) > ::type +#define __typeof__(x) std::remove_reference::type #endif #define PE_ERR(x) \ @@ -39,28 +40,28 @@ THE SOFTWARE. err_loc.assign(__func__); \ err_loc += ":" + to_string(__LINE__, std::dec); -#define READ_WORD(b, o, inst, member) \ - if (!readWord(b, o + _offset(__typeof__(inst), member), inst.member)) { \ - PE_ERR(PEERR_READ); \ - return false; \ - } - -#define READ_DWORD(b, o, inst, member) \ - if (!readDword(b, o + _offset(__typeof__(inst), member), inst.member)) { \ +#define READ_WORD(b, o, inst, member) \ + if (!readWord(b, o + offsetof(__typeof__(inst), member), inst.member)) { \ PE_ERR(PEERR_READ); \ return false; \ } -#define READ_QWORD(b, o, inst, member) \ - if (!readQword(b, o + _offset(__typeof__(inst), member), inst.member)) { \ - PE_ERR(PEERR_READ); \ - return false; \ +#define READ_DWORD(b, o, inst, member) \ + if (!readDword(b, o + offsetof(__typeof__(inst), member), inst.member)) { \ + PE_ERR(PEERR_READ); \ + return false; \ } -#define READ_BYTE(b, o, inst, member) \ - if (!readByte(b, o + _offset(__typeof__(inst), member), inst.member)) { \ - PE_ERR(PEERR_READ); \ - return false; \ +#define READ_QWORD(b, o, inst, member) \ + if (!readQword(b, o + offsetof(__typeof__(inst), member), inst.member)) { \ + PE_ERR(PEERR_READ); \ + return false; \ + } + +#define READ_BYTE(b, o, inst, member) \ + if (!readByte(b, o + offsetof(__typeof__(inst), member), inst.member)) { \ + PE_ERR(PEERR_READ); \ + return false; \ } #define TEST_MACHINE_CHARACTERISTICS(h, m, ch) \ @@ -98,6 +99,7 @@ struct resource { bounded_buffer *buf; }; +#ifndef _PEPARSE_WINDOWS_CONFLICTS // http://msdn.microsoft.com/en-us/library/ms648009(v=vs.85).aspx enum resource_type { RT_CURSOR = 1, @@ -122,6 +124,7 @@ enum resource_type { RT_HTML = 23, RT_MANIFEST = 24 }; +#endif enum pe_err { PEERR_NONE = 0, @@ -136,14 +139,17 @@ enum pe_err { PEERR_MAGIC = 9, PEERR_BUFFER = 10, PEERR_ADDRESS = 11, + PEERR_SIZE = 12, }; bool readByte(bounded_buffer *b, std::uint32_t offset, std::uint8_t &out); bool readWord(bounded_buffer *b, std::uint32_t offset, std::uint16_t &out); bool readDword(bounded_buffer *b, std::uint32_t offset, std::uint32_t &out); bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out); +bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out); bounded_buffer *readFileToFileBuffer(const char *filePath); +bounded_buffer *makeBufferFromPointer(std::uint8_t *data, std::uint32_t sz); bounded_buffer * splitBuffer(bounded_buffer *b, std::uint32_t from, std::uint32_t to); void deleteBuffer(bounded_buffer *b); @@ -152,6 +158,8 @@ uint64_t bufLen(bounded_buffer *b); struct parsed_pe_internal; typedef struct _pe_header { + dos_header dos; + rich_header rich; nt_header_32 nt; } pe_header; @@ -161,6 +169,12 @@ typedef struct _parsed_pe { pe_header peHeader; } parsed_pe; +// Resolve a Rich header product id / build number pair to a known +// product name +typedef std::pair ProductKey; +const std::string &GetRichObjectType(std::uint16_t prodId); +const std::string &GetRichProductName(std::uint16_t buildNum); + // get parser error status as integer std::uint32_t GetPEErr(); @@ -173,38 +187,54 @@ std::string GetPEErrLoc(); // get a PE parse context from a file parsed_pe *ParsePEFromFile(const char *filePath); +parsed_pe *ParsePEFromPointer(std::uint8_t *buffer, std::uint32_t sz); +parsed_pe *ParsePEFromBuffer(bounded_buffer *buffer); + // destruct a PE context void DestructParsedPE(parsed_pe *p); +// iterate over Rich header entries +typedef int (*iterRich)(void *, const rich_entry &); +void IterRich(parsed_pe *pe, iterRich cb, void *cbd); + // iterate over the resources -typedef int (*iterRsrc)(void *, resource); +typedef int (*iterRsrc)(void *, const resource &); void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd); // iterate over the imports by RVA and string -typedef int (*iterVAStr)(void *, VA, const std::string &, const std::string &); +typedef int (*iterVAStr)(void *, + const VA &, + const std::string &, + const std::string &); void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd); // iterate over relocations in the PE file -typedef int (*iterReloc)(void *, VA, reloc_type); +typedef int (*iterReloc)(void *, const VA &, const reloc_type &); void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd); // Iterate over symbols (symbol table) in the PE file typedef int (*iterSymbol)(void *, - std::string &, - std::uint32_t &, - std::int16_t &, - std::uint16_t &, - std::uint8_t &, - std::uint8_t &); + const std::string &, + const std::uint32_t &, + const std::int16_t &, + const std::uint16_t &, + const std::uint8_t &, + const std::uint8_t &); void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd); // iterate over the exports -typedef int (*iterExp)(void *, VA, std::string &, std::string &); +typedef int (*iterExp)(void *, + const VA &, + const std::string &, + const std::string &); void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd); // iterate over sections -typedef int (*iterSec)( - void *, VA secBase, std::string &, image_section_header, bounded_buffer *b); +typedef int (*iterSec)(void *, + const VA &, + const std::string &, + const image_section_header &, + const bounded_buffer *); void IterSec(parsed_pe *pe, iterSec cb, void *cbd); // get byte at VA in PE @@ -218,4 +248,9 @@ const char *GetMachineAsString(parsed_pe *pe); // get subsystem as human readable string const char *GetSubsystemAsString(parsed_pe *pe); + +// get a table or string by its data directory entry +bool GetDataDirectoryEntry(parsed_pe *pe, + data_directory_kind dirnum, + std::vector &raw_entry); } // namespace peparse diff --git a/pe-parser-library/include/parser-library/to_string.h b/pe-parser-library/include/pe-parse/to_string.h similarity index 56% rename from pe-parser-library/include/parser-library/to_string.h rename to pe-parser-library/include/pe-parse/to_string.h index e4a6b75..92933b0 100644 --- a/pe-parser-library/include/parser-library/to_string.h +++ b/pe-parser-library/include/pe-parse/to_string.h @@ -1,6 +1,13 @@ #pragma once #include +#include + +#if defined(_MSC_VER) +typedef std::basic_string UCharString; +#else +typedef std::u16string UCharString; +#endif namespace peparse { template @@ -9,4 +16,6 @@ static std::string to_string(T t, std::ios_base &(*f)(std::ios_base &) ) { oss << f << t; return oss.str(); } + +std::string from_utf16(const UCharString &u); } // namespace peparse diff --git a/pe-parser-library/src/buffer.cpp b/pe-parser-library/src/buffer.cpp index 60be897..5b27ebf 100644 --- a/pe-parser-library/src/buffer.cpp +++ b/pe-parser-library/src/buffer.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. #include // keep this header above "windows.h" because it contains many types -#include +#include #ifdef _WIN32 @@ -164,6 +164,31 @@ bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out) { return true; } +bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out) { + if (b == nullptr) { + PE_ERR(PEERR_BUFFER); + return false; + } + + if (offset + 1 >= b->bufLen) { + PE_ERR(PEERR_ADDRESS); + return false; + } + + char16_t *tmp = nullptr; + if (b->swapBytes) { + std::uint8_t tmpBuf[2]; + tmpBuf[0] = *(b->buf + offset + 1); + tmpBuf[1] = *(b->buf + offset); + tmp = reinterpret_cast(tmpBuf); + } else { + tmp = reinterpret_cast(b->buf + offset); + } + out = *tmp; + + return true; +} + bounded_buffer *readFileToFileBuffer(const char *filePath) { #ifdef _WIN32 HANDLE h = CreateFileA(filePath, @@ -273,6 +298,28 @@ bounded_buffer *readFileToFileBuffer(const char *filePath) { return p; } +bounded_buffer *makeBufferFromPointer(std::uint8_t *data, std::uint32_t sz) { + if (data == nullptr) { + PE_ERR(PEERR_MEM); + return nullptr; + } + + bounded_buffer *p = new (std::nothrow) bounded_buffer(); + + if (p == nullptr) { + PE_ERR(PEERR_MEM); + return nullptr; + } + + p->copy = true; + p->detail = nullptr; + p->buf = data; + p->bufLen = sz; + p->swapBytes = false; + + return p; +} + // split buffer inclusively from from to to by offset bounded_buffer * splitBuffer(bounded_buffer *b, std::uint32_t from, std::uint32_t to) { diff --git a/pe-parser-library/src/parse.cpp b/pe-parser-library/src/parse.cpp index 03db25b..42fbcb8 100644 --- a/pe-parser-library/src/parse.cpp +++ b/pe-parser-library/src/parse.cpp @@ -23,14 +23,16 @@ THE SOFTWARE. */ #include +#include +#include #include #include -#include #include +#include -#include -#include -#include +#include +#include +#include namespace peparse { @@ -122,21 +124,372 @@ struct parsed_pe_internal { std::vector symbols; }; +// String representation of Rich header object types +static const std::string kProdId_C = "[ C ]"; +static const std::string kProdId_CPP = "[C++]"; +static const std::string kProdId_RES = "[RES]"; +static const std::string kProdId_IMP = "[IMP]"; +static const std::string kProdId_EXP = "[EXP]"; +static const std::string kProdId_ASM = "[ASM]"; +static const std::string kProdId_LNK = "[LNK]"; +static const std::string kProdId_UNK = "[ ? ]"; + +// Mapping of Rich header Product ID to object type string +// Source: https://github.com/dishather/richprint/blob/master/comp_id.txt +static const std::map ProductIdMap = { + {std::make_pair(static_cast(0x0000), kProdId_UNK)}, + {std::make_pair(static_cast(0x0002), kProdId_IMP)}, + {std::make_pair(static_cast(0x0004), kProdId_LNK)}, + {std::make_pair(static_cast(0x0006), kProdId_RES)}, + {std::make_pair(static_cast(0x000A), kProdId_C)}, + {std::make_pair(static_cast(0x000B), kProdId_CPP)}, + {std::make_pair(static_cast(0x000F), kProdId_ASM)}, + {std::make_pair(static_cast(0x0015), kProdId_C)}, + {std::make_pair(static_cast(0x0016), kProdId_CPP)}, + {std::make_pair(static_cast(0x0019), kProdId_IMP)}, + {std::make_pair(static_cast(0x001C), kProdId_C)}, + {std::make_pair(static_cast(0x001D), kProdId_CPP)}, + {std::make_pair(static_cast(0x003D), kProdId_LNK)}, + {std::make_pair(static_cast(0x003F), kProdId_EXP)}, + {std::make_pair(static_cast(0x0040), kProdId_ASM)}, + {std::make_pair(static_cast(0x0045), kProdId_RES)}, + {std::make_pair(static_cast(0x005A), kProdId_LNK)}, + {std::make_pair(static_cast(0x005C), kProdId_EXP)}, + {std::make_pair(static_cast(0x005D), kProdId_IMP)}, + {std::make_pair(static_cast(0x005E), kProdId_RES)}, + {std::make_pair(static_cast(0x005F), kProdId_C)}, + {std::make_pair(static_cast(0x0060), kProdId_CPP)}, + {std::make_pair(static_cast(0x006D), kProdId_C)}, + {std::make_pair(static_cast(0x006E), kProdId_CPP)}, + {std::make_pair(static_cast(0x0078), kProdId_LNK)}, + {std::make_pair(static_cast(0x007A), kProdId_EXP)}, + {std::make_pair(static_cast(0x007B), kProdId_IMP)}, + {std::make_pair(static_cast(0x007C), kProdId_RES)}, + {std::make_pair(static_cast(0x007D), kProdId_ASM)}, + {std::make_pair(static_cast(0x0083), kProdId_C)}, + {std::make_pair(static_cast(0x0084), kProdId_CPP)}, + {std::make_pair(static_cast(0x0091), kProdId_LNK)}, + {std::make_pair(static_cast(0x0092), kProdId_EXP)}, + {std::make_pair(static_cast(0x0093), kProdId_IMP)}, + {std::make_pair(static_cast(0x0094), kProdId_RES)}, + {std::make_pair(static_cast(0x0095), kProdId_ASM)}, + {std::make_pair(static_cast(0x009A), kProdId_RES)}, + {std::make_pair(static_cast(0x009B), kProdId_EXP)}, + {std::make_pair(static_cast(0x009C), kProdId_IMP)}, + {std::make_pair(static_cast(0x009D), kProdId_LNK)}, + {std::make_pair(static_cast(0x009E), kProdId_ASM)}, + {std::make_pair(static_cast(0x00AA), kProdId_C)}, + {std::make_pair(static_cast(0x00AB), kProdId_CPP)}, + {std::make_pair(static_cast(0x00C9), kProdId_RES)}, + {std::make_pair(static_cast(0x00CA), kProdId_EXP)}, + {std::make_pair(static_cast(0x00CB), kProdId_IMP)}, + {std::make_pair(static_cast(0x00CC), kProdId_LNK)}, + {std::make_pair(static_cast(0x00CD), kProdId_ASM)}, + {std::make_pair(static_cast(0x00CE), kProdId_C)}, + {std::make_pair(static_cast(0x00CF), kProdId_CPP)}, + {std::make_pair(static_cast(0x00DB), kProdId_RES)}, + {std::make_pair(static_cast(0x00DC), kProdId_EXP)}, + {std::make_pair(static_cast(0x00DD), kProdId_IMP)}, + {std::make_pair(static_cast(0x00DE), kProdId_LNK)}, + {std::make_pair(static_cast(0x00DF), kProdId_ASM)}, + {std::make_pair(static_cast(0x00E0), kProdId_C)}, + {std::make_pair(static_cast(0x00E1), kProdId_CPP)}, + {std::make_pair(static_cast(0x00FF), kProdId_RES)}, + {std::make_pair(static_cast(0x0100), kProdId_EXP)}, + {std::make_pair(static_cast(0x0101), kProdId_IMP)}, + {std::make_pair(static_cast(0x0102), kProdId_LNK)}, + {std::make_pair(static_cast(0x0103), kProdId_ASM)}, + {std::make_pair(static_cast(0x0104), kProdId_C)}, + {std::make_pair(static_cast(0x0105), kProdId_CPP)}}; + +// Mapping of Rich header build number to version strings +static const std::map ProductMap = { + // Source: https://github.com/dishather/richprint/blob/master/comp_id.txt + {std::make_pair(static_cast(0x0000), "Imported Functions")}, + {std::make_pair(static_cast(0x0684), + "VS97 v5.0 SP3 cvtres 5.00.1668")}, + {std::make_pair(static_cast(0x06B8), + "VS98 v6.0 cvtres build 1720")}, + {std::make_pair(static_cast(0x06C8), + "VS98 v6.0 SP6 cvtres build 1736")}, + {std::make_pair(static_cast(0x1C87), + "VS97 v5.0 SP3 link 5.10.7303")}, + {std::make_pair(static_cast(0x5E92), + "VS2015 v14.0 UPD3 build 24210")}, + {std::make_pair(static_cast(0x5E95), + "VS2015 UPD3 build 24213")}, + + // http://bytepointer.com/articles/the_microsoft_rich_header.htm + {std::make_pair(static_cast(0x0BEC), + "VS2003 v7.1 Free Toolkit .NET build 3052")}, + {std::make_pair(static_cast(0x0C05), + "VS2003 v7.1 .NET build 3077")}, + {std::make_pair(static_cast(0x0FC3), + "VS2003 v7.1 | Windows Server 2003 SP1 DDK build 4035")}, + {std::make_pair(static_cast(0x1C83), "MASM 6.13.7299")}, + {std::make_pair(static_cast(0x178E), + "VS2003 v7.1 SP1 .NET build 6030")}, + {std::make_pair(static_cast(0x1FE8), + "VS98 v6.0 RTM/SP1/SP2 build 8168")}, + {std::make_pair(static_cast(0x1FE9), + "VB 6.0/SP1/SP2 build 8169")}, + {std::make_pair(static_cast(0x20FC), "MASM 6.14.8444")}, + {std::make_pair(static_cast(0x20FF), + "VC++ 6.0 SP3 build 8447")}, + {std::make_pair(static_cast(0x212F), + "VB 6.0 SP3 build 8495")}, + {std::make_pair(static_cast(0x225F), + "VS 6.0 SP4 build 8799")}, + {std::make_pair(static_cast(0x2263), "MASM 6.15.8803")}, + {std::make_pair(static_cast(0x22AD), + "VB 6.0 SP4 build 8877")}, + {std::make_pair(static_cast(0x2304), + "VB 6.0 SP5 build 8964")}, + {std::make_pair(static_cast(0x2306), + "VS 6.0 SP5 build 8966")}, + // {std::make_pair(static_cast(0x2346), "MASM 6.15.9030 + // (VS.NET 7.0 BETA 1)")}, + {std::make_pair(static_cast(0x2346), + "VS 7.0 2000 Beta 1 build 9030")}, + {std::make_pair(static_cast(0x2354), + "VS 6.0 SP5 Processor Pack build 9044")}, + {std::make_pair(static_cast(0x2426), + "VS2001 v7.0 Beta 2 build 9254")}, + {std::make_pair(static_cast(0x24FA), + "VS2002 v7.0 .NET build 9466")}, + {std::make_pair(static_cast(0x2636), + "VB 6.0 SP6 / VC++ build 9782")}, + {std::make_pair(static_cast(0x26E3), + "VS2002 v7.0 SP1 build 9955")}, + {std::make_pair(static_cast(0x520D), + "VS2013 v12.[0,1] build 21005")}, + {std::make_pair(static_cast(0x521E), + "VS2008 v9.0 build 21022")}, + {std::make_pair(static_cast(0x56C7), + "VS2015 v14.0 build 22215")}, + {std::make_pair(static_cast(0x59F2), + "VS2015 v14.0 build 23026")}, + {std::make_pair(static_cast(0x5BD2), + "VS2015 v14.0 UPD1 build 23506")}, + {std::make_pair(static_cast(0x5D10), + "VS2015 v14.0 UPD2 build 23824")}, + {std::make_pair(static_cast(0x5E97), + "VS2015 v14.0 UPD3.1 build 24215")}, + {std::make_pair(static_cast(0x7725), + "VS2013 v12.0 UPD2 build 30501")}, + {std::make_pair(static_cast(0x766F), + "VS2010 v10.0 build 30319")}, + {std::make_pair(static_cast(0x7809), + "VS2008 v9.0 SP1 build 30729")}, + {std::make_pair(static_cast(0x797D), + "VS2013 v12.0 UPD4 build 31101")}, + {std::make_pair(static_cast(0x9D1B), + "VS2010 v10.0 SP1 build 40219")}, + {std::make_pair(static_cast(0x9EB5), + "VS2013 v12.0 UPD5 build 40629")}, + {std::make_pair(static_cast(0xC497), + "VS2005 v8.0 (Beta) build 50327")}, + {std::make_pair(static_cast(0xC627), + "VS2005 v8.0 | VS2012 v11.0 build 50727")}, + {std::make_pair(static_cast(0xC751), + "VS2012 v11.0 Nov CTP build 51025")}, + {std::make_pair(static_cast(0xC7A2), + "VS2012 v11.0 UPD1 build 51106")}, + {std::make_pair(static_cast(0xEB9B), + "VS2012 v11.0 UPD2 build 60315")}, + {std::make_pair(static_cast(0xECC2), + "VS2012 v11.0 UPD3 build 60610")}, + {std::make_pair(static_cast(0xEE66), + "VS2012 v11.0 UPD4 build 61030")}, + {std::make_pair(static_cast(0x5E9A), + "VS2015 v14.0 build 24218")}, + {std::make_pair(static_cast(0x61BB), + "VS2017 v14.1 build 25019")}, + + // https://dev.to/yumetodo/list-of-mscver-and-mscfullver-8nd + {std::make_pair(static_cast(0x2264), + "VS 6 [SP5,SP6] build 8804")}, + {std::make_pair(static_cast(0x23D8), "Windows XP SP1 DDK")}, + {std::make_pair(static_cast(0x0883), + "Windows Server 2003 DDK")}, + {std::make_pair(static_cast(0x08F4), + "VS2003 v7.1 .NET Beta build 2292")}, + {std::make_pair(static_cast(0x9D76), + "Windows Server 2003 SP1 DDK (for AMD64)")}, + {std::make_pair(static_cast(0x9E9F), + "VS2005 v8.0 Beta 1 build 40607")}, + {std::make_pair(static_cast(0xC427), + "VS2005 v8.0 Beta 2 build 50215")}, + {std::make_pair(static_cast(0xC490), + "VS2005 v8.0 build 50320")}, + {std::make_pair(static_cast(0x50E2), + "VS2008 v9.0 Beta 2 build 20706")}, + {std::make_pair(static_cast(0x501A), + "VS2010 v10.0 Beta 1 build 20506")}, + {std::make_pair(static_cast(0x520B), + "VS2010 v10.0 Beta 2 build 21003")}, + {std::make_pair(static_cast(0x5089), + "VS2013 v12.0 Preview build 20617")}, + {std::make_pair(static_cast(0x515B), + "VS2013 v12.0 RC build 20827")}, + {std::make_pair(static_cast(0x527A), + "VS2013 v12.0 Nov CTP build 21114")}, + {std::make_pair(static_cast(0x63A3), + "VS2017 v15.3.3 build 25507")}, + {std::make_pair(static_cast(0x63C6), + "VS2017 v15.4.4 build 25542")}, + {std::make_pair(static_cast(0x63CB), + "VS2017 v15.4.5 build 25547")}, + {std::make_pair(static_cast(0x7674), + "VS2013 v12.0 UPD2 RC build 30324")}, + + // https://walbourn.github.io/visual-studio-2015-update-2/ + {std::make_pair(static_cast(0x5D6E), + "VS2015 v14.0 UPD2 build 23918")}, + + // https://walbourn.github.io/visual-studio-2017/ + {std::make_pair(static_cast(0x61B9), + "VS2017 v15.[0,1] build 25017")}, + {std::make_pair(static_cast(0x63A2), + "VS2017 v15.2 build 25019")}, + + // https://walbourn.github.io/vs-2017-15-5-update/ + {std::make_pair(static_cast(0x64E6), + "VS2017 v15 build 25830")}, + {std::make_pair(static_cast(0x64E7), + "VS2017 v15.5.2 build 25831")}, + {std::make_pair(static_cast(0x64EA), + "VS2017 v15.5.[3,4] build 25834")}, + {std::make_pair(static_cast(0x64EB), + "VS2017 v15.5.[5,6,7] build 25835")}, + + // https://walbourn.github.io/vs-2017-15-6-update/ + {std::make_pair(static_cast(0x6610), + "VS2017 v15.6.[0,1,2] build 26128")}, + {std::make_pair(static_cast(0x6611), + "VS2017 v15.6.[3,4] build 26129")}, + {std::make_pair(static_cast(0x6613), + "VS2017 v15.6.6 build 26131")}, + {std::make_pair(static_cast(0x6614), + "VS2017 v15.6.7 build 26132")}, + + // https://devblogs.microsoft.com/visualstudio/visual-studio-2017-update/ + {std::make_pair(static_cast(0x6723), + "VS2017 v15.1 build 26403")}, + + // https://walbourn.github.io/vs-2017-15-7-update/ + {std::make_pair(static_cast(0x673C), + "VS2017 v15.7.[0,1] build 26428")}, + {std::make_pair(static_cast(0x673D), + "VS2017 v15.7.2 build 26429")}, + {std::make_pair(static_cast(0x673E), + "VS2017 v15.7.3 build 26430")}, + {std::make_pair(static_cast(0x673F), + "VS2017 v15.7.4 build 26431")}, + {std::make_pair(static_cast(0x6741), + "VS2017 v15.7.5 build 26433")}, + + // https://walbourn.github.io/visual-studio-2019/ + {std::make_pair(static_cast(0x6B74), + "VS2019 v16.0.0 build 27508")}, + + // https://walbourn.github.io/vs-2017-15-8-update/ + {std::make_pair(static_cast(0x6866), + "VS2017 v15.8.0 build 26726")}, + {std::make_pair(static_cast(0x6869), + "VS2017 v15.8.4 build 26729")}, + {std::make_pair(static_cast(0x686A), + "VS2017 v15.8.9 build 26730")}, + {std::make_pair(static_cast(0x686C), + "VS2017 v15.8.5 build 26732")}, + + // https://walbourn.github.io/vs-2017-15-9-update/ + {std::make_pair(static_cast(0x698F), + "VS2017 v15.9.[0,1] build 27023")}, + {std::make_pair(static_cast(0x6990), + "VS2017 v15.9.2 build 27024")}, + {std::make_pair(static_cast(0x6991), + "VS2017 v15.9.4 build 27025")}, + {std::make_pair(static_cast(0x6992), + "VS2017 v15.9.5 build 27026")}, + {std::make_pair(static_cast(0x6993), + "VS2017 v15.9.7 build 27027")}, + {std::make_pair(static_cast(0x6996), + "VS2017 v15.9.11 build 27030")}, + {std::make_pair(static_cast(0x6997), + "VS2017 v15.9.12 build 27031")}, + {std::make_pair(static_cast(0x6998), + "VS2017 v15.9.14 build 27032")}, + {std::make_pair(static_cast(0x699A), + "VS2017 v15.9.16 build 27034")}, + + // https://walbourn.github.io/visual-studio-2019/ + {std::make_pair(static_cast(0x6B74), + "VS2019 v16.0.0 RTM build 27508")}, + + // https://walbourn.github.io/vs-2019-update-1/ + {std::make_pair(static_cast(0x6C36), + "VS2019 v16.1.2 UPD1 build 27702")}, + + // https://walbourn.github.io/vs-2019-update-2/ + {std::make_pair(static_cast(0x6D01), + "VS2019 v16.2.3 UPD2 build 27905")}, + + // https://walbourn.github.io/vs-2019-update-3/ + {std::make_pair(static_cast(0x6DC9), + "VS2019 v16.3.2 UPD3 build 28105")}, + + // https://walbourn.github.io/visual-studio-2013-update-3/ + {std::make_pair(static_cast(0x7803), + "VS2013 v12.0 UPD3 build 30723")}, + + // experimentation + {std::make_pair(static_cast(0x685B), + "VS2017 v15.8.? build 26715")}, +}; + +static const std::string kUnknownProduct = ""; + +// Returns a stringified Rich header object type given a product id +const std::string &GetRichObjectType(std::uint16_t prodId) { + + auto it = ProductIdMap.find(prodId); + if (it != ProductIdMap.end()) { + return it->second; + } else { + return kProdId_UNK; + } +} + +// Returns a stringified Rich header product name given a build number +const std::string &GetRichProductName(std::uint16_t buildNum) { + + auto it = ProductMap.find(buildNum); + if (it != ProductMap.end()) { + return it->second; + } else { + return kUnknownProduct; + } +} + std::uint32_t err = 0; std::string err_loc; -static const char *pe_err_str[] = {"None", - "Out of memory", - "Invalid header", - "Invalid section", - "Invalid resource", - "Unable to get section for VA", - "Unable to read data", - "Unable to open", - "Unable to stat", - "Bad magic", - "Invalid buffer", - "Invalid address",}; +static const char *pe_err_str[] = { + "None", + "Out of memory", + "Invalid header", + "Invalid section", + "Invalid resource", + "Unable to get section for VA", + "Unable to read data", + "Unable to open", + "Unable to stat", + "Bad magic", + "Invalid buffer", + "Invalid address", + "Invalid size", +}; std::uint32_t GetPEErr() { return err; @@ -211,8 +564,9 @@ const char *GetSymbolTableStorageClassName(std::uint8_t id) { } } -static bool -readCString(const bounded_buffer &buffer, std::uint32_t off, std::string &result) { +static bool readCString(const bounded_buffer &buffer, + std::uint32_t off, + std::string &result) { if (off < buffer.bufLen) { std::uint8_t *p = buffer.buf; std::uint32_t n = buffer.bufLen; @@ -243,32 +597,44 @@ bool getSecForVA(const std::vector
&secs, VA v, section &sec) { return false; } -void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) { - parsed_pe_internal *pint = pe->internal; - - for (resource r : pint->rsrcs) { +void IterRich(parsed_pe *pe, iterRich cb, void *cbd) { + for (rich_entry &r : pe->peHeader.rich.Entries) { if (cb(cbd, r) != 0) { break; } } - - return; } -bool parse_resource_id(bounded_buffer *data, std::uint32_t id, std::string &result) { - std::uint8_t c; - std::uint16_t len; +void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) { + parsed_pe_internal *pint = pe->internal; + for (const resource &r : pint->rsrcs) { + if (cb(cbd, r) != 0) { + break; + } + } +} + +bool parse_resource_id(bounded_buffer *data, + std::uint32_t id, + std::string &result) { + std::uint16_t len; if (!readWord(data, id, len)) { return false; } id += 2; - for (std::uint32_t i = 0; i < len * 2U; i++) { - if (!readByte(data, id + i, c)) { + + std::uint32_t rawSize = len * 2U; + UCharString rawString; + for (std::uint32_t i = 0; i < rawSize; i += 2) { + char16_t c; + if (!readChar16(data, id + i, c)) { return false; } - result.push_back(static_cast(c)); + rawString.push_back(c); } + + result = from_utf16(rawString); return true; } @@ -305,7 +671,7 @@ bool parse_resource_table(bounded_buffer *sectionData, rde = new resource_dir_entry; } - if (!readDword(sectionData, o + _offset(__typeof__(*rde), ID), rde->ID)) { + if (!readDword(sectionData, o + offsetof(__typeof__(*rde), ID), rde->ID)) { PE_ERR(PEERR_READ); if (dirent == nullptr) { delete rde; @@ -313,7 +679,8 @@ bool parse_resource_table(bounded_buffer *sectionData, return false; } - if (!readDword(sectionData, o + _offset(__typeof__(*rde), RVA), rde->RVA)) { + if (!readDword( + sectionData, o + offsetof(__typeof__(*rde), RVA), rde->RVA)) { PE_ERR(PEERR_READ); if (dirent == nullptr) { delete rde; @@ -357,9 +724,10 @@ bool parse_resource_table(bounded_buffer *sectionData, } } } else { - /* .rsrc can accomodate up to 2**31 levels, but Windows only uses 3 by convention. - * As such, any depth above 3 indicates potentially unchecked recusion. - * See: https://docs.microsoft.com/en-us/windows/desktop/debug/pe-format#the-rsrc-section + /* .rsrc can accomodate up to 2**31 levels, but Windows only uses 3 by + * convention. As such, any depth above 3 indicates potentially unchecked + * recusion. See: + * https://docs.microsoft.com/en-us/windows/desktop/debug/pe-format#the-rsrc-section */ PE_ERR(PEERR_RESC); @@ -393,7 +761,7 @@ bool parse_resource_table(bounded_buffer *sectionData, */ if (!readDword(sectionData, - rde->RVA + _offset(__typeof__(rdat), RVA), + rde->RVA + offsetof(__typeof__(rdat), RVA), rdat.RVA)) { PE_ERR(PEERR_READ); if (dirent == nullptr) { @@ -403,7 +771,7 @@ bool parse_resource_table(bounded_buffer *sectionData, } if (!readDword(sectionData, - rde->RVA + _offset(__typeof__(rdat), size), + rde->RVA + offsetof(__typeof__(rdat), size), rdat.size)) { PE_ERR(PEERR_READ); if (dirent == nullptr) { @@ -413,7 +781,7 @@ bool parse_resource_table(bounded_buffer *sectionData, } if (!readDword(sectionData, - rde->RVA + _offset(__typeof__(rdat), codepage), + rde->RVA + offsetof(__typeof__(rdat), codepage), rdat.codepage)) { PE_ERR(PEERR_READ); if (dirent == nullptr) { @@ -423,7 +791,7 @@ bool parse_resource_table(bounded_buffer *sectionData, } if (!readDword(sectionData, - rde->RVA + _offset(__typeof__(rdat), reserved), + rde->RVA + offsetof(__typeof__(rdat), reserved), rdat.reserved)) { PE_ERR(PEERR_READ); if (dirent == nullptr) { @@ -544,7 +912,8 @@ bool getSections(bounded_buffer *b, // now we have the section header information, so fill in a section // object appropriately section thisSec; - for (std::uint32_t charIndex = 0; charIndex < NT_SHORT_NAME_LEN; charIndex++) { + for (std::uint32_t charIndex = 0; charIndex < NT_SHORT_NAME_LEN; + charIndex++) { std::uint8_t c = curSec.Name[charIndex]; if (c == 0) { break; @@ -568,9 +937,22 @@ bool getSections(bounded_buffer *b, std::uint32_t highOff = lowOff + curSec.SizeOfRawData; thisSec.sectionData = splitBuffer(fileBegin, lowOff, highOff); + // GH#109: we trusted [lowOff, highOff) to be a range that yields + // a valid bounded_buffer, despite these being user-controllable. + // splitBuffer correctly handles this, but we failed to check for + // the nullptr it returns as a sentinel. + if (thisSec.sectionData == nullptr) { + return false; + } + secs.push_back(thisSec); } + std::sort( + secs.begin(), secs.end(), [](const section &lhs, const section &rhs) { + return lhs.sec.PointerToRawData < rhs.sec.PointerToRawData; + }); + return true; } @@ -613,15 +995,15 @@ bool readOptionalHeader(bounded_buffer *b, optional_header_32 &header) { for (std::uint32_t i = 0; i < header.NumberOfRvaAndSizes; i++) { std::uint32_t c = (i * sizeof(data_directory)); - c += _offset(optional_header_32, DataDirectory[0]); + c += offsetof(optional_header_32, DataDirectory[0]); std::uint32_t o; - o = c + _offset(data_directory, VirtualAddress); + o = c + offsetof(data_directory, VirtualAddress); if (!readDword(b, o, header.DataDirectory[i].VirtualAddress)) { return false; } - o = c + _offset(data_directory, Size); + o = c + offsetof(data_directory, Size); if (!readDword(b, o, header.DataDirectory[i].Size)) { return false; } @@ -668,15 +1050,15 @@ bool readOptionalHeader64(bounded_buffer *b, optional_header_64 &header) { for (std::uint32_t i = 0; i < header.NumberOfRvaAndSizes; i++) { std::uint32_t c = (i * sizeof(data_directory)); - c += _offset(optional_header_64, DataDirectory[0]); + c += offsetof(optional_header_64, DataDirectory[0]); std::uint32_t o; - o = c + _offset(data_directory, VirtualAddress); + o = c + offsetof(data_directory, VirtualAddress); if (!readDword(b, o, header.DataDirectory[i].VirtualAddress)) { return false; } - o = c + _offset(data_directory, Size); + o = c + offsetof(data_directory, Size); if (!readDword(b, o, header.DataDirectory[i].Size)) { return false; } @@ -711,7 +1093,7 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) { header.Signature = pe_magic; bounded_buffer *fhb = - splitBuffer(b, _offset(nt_header_32, FileHeader), b->bufLen); + splitBuffer(b, offsetof(nt_header_32, FileHeader), b->bufLen); if (fhb == nullptr) { PE_ERR(PEERR_MEM); @@ -750,7 +1132,7 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) { * buffer regardless. */ bounded_buffer *ohb = - splitBuffer(b, _offset(nt_header_32, OptionalHeader), b->bufLen); + splitBuffer(b, offsetof(nt_header_32, OptionalHeader), b->bufLen); if (ohb == nullptr) { deleteBuffer(fhb); @@ -794,30 +1176,251 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) { return true; } +// zero extends its first argument to 32 bits and then performs a rotate left +// operation equal to the second arguments value of the first argument’s bits +static inline std::uint32_t rol(std::uint32_t val, std::uint32_t num) { + assert(num < 32); + // Disable MSVC warning for unary minus operator applied to unsigned type +#if defined(_MSC_VER) || defined(_MSC_FULL_VER) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + // https://blog.regehr.org/archives/1063 + return (val << num) | (val >> (-num & 31)); +#if defined(_MSC_VER) || defined(_MSC_FULL_VER) +#pragma warning(pop) +#endif +} + +std::uint32_t calculateRichChecksum(const bounded_buffer *b, pe_header &p) { + + // First, calculate the sum of the DOS header bytes each rotated left the + // number of times their position relative to the start of the DOS header e.g. + // second byte is rotated left 2x using rol operation + std::uint32_t checksum = 0; + + for (uint8_t i = 0; i < RICH_OFFSET; i++) { + + // skip over dos e_lfanew field at offset 0x3C + if (i >= 0x3C && i <= 0x3F) { + continue; + } + checksum += rol(b->buf[i], i & 0x1F); + } + + // Next, take summation of each Rich header entry by combining its ProductId + // and BuildNumber into a single 32 bit number and rotating by its count. + for (rich_entry entry : p.rich.Entries) { + std::uint32_t num = + static_cast((entry.ProductId << 16) | entry.BuildNumber); + checksum += rol(num, entry.Count & 0x1F); + } + + checksum += RICH_OFFSET; + + return checksum; +} + +bool readRichHeader(bounded_buffer *rich_buf, + std::uint32_t key, + rich_header &rich_hdr) { + if (rich_buf == nullptr) { + return false; + } + + std::uint32_t encrypted_dword; + std::uint32_t decrypted_dword; + + // Confirm DanS signature exists first. + // The first decrypted DWORD value of the rich header + // at offset 0 should be 0x536e6144 aka the "DanS" signature + if (!readDword(rich_buf, 0, encrypted_dword)) { + PE_ERR(PEERR_READ); + return false; + } + + decrypted_dword = encrypted_dword ^ key; + + if (decrypted_dword == RICH_MAGIC_START) { + // DanS magic found + rich_hdr.isPresent = true; + rich_hdr.StartSignature = decrypted_dword; + } else { + // DanS magic not found + rich_hdr.isPresent = false; + return false; + } + + // Iterate over the remaining entries. + // Start from buffer offset 16 because after "DanS" there + // are three DWORDs of zero padding that can be skipped over. + // a DWORD is 4 bytes. Loop is incrementing 8 bytes, however + // we are reading two DWORDS at a time, which is the size + // of one rich header entry. + for (std::uint32_t i = 16; i < rich_buf->bufLen - 8; i += 8) { + rich_entry entry; + // Read first DWORD of entry and decrypt it + if (!readDword(rich_buf, i, encrypted_dword)) { + PE_ERR(PEERR_READ); + return false; + } + decrypted_dword = encrypted_dword ^ key; + // The high WORD of the first DWORD is the Product ID + entry.ProductId = (decrypted_dword & 0xFFFF0000) >> 16; + // The low WORD of the first DWORD is the Build Number + entry.BuildNumber = (decrypted_dword & 0xFFFF); + + // The second DWORD represents the use count + if (!readDword(rich_buf, i + 4, encrypted_dword)) { + PE_ERR(PEERR_READ); + return false; + } + decrypted_dword = encrypted_dword ^ key; + // The full 32-bit DWORD is the count + entry.Count = decrypted_dword; + + // Preserve the individual entry + rich_hdr.Entries.push_back(entry); + } + + // Preserve the end signature aka "Rich" magic + if (!readDword(rich_buf, rich_buf->bufLen - 4, rich_hdr.EndSignature)) { + PE_ERR(PEERR_READ); + return false; + }; + if (rich_hdr.EndSignature != RICH_MAGIC_END) { + PE_ERR(PEERR_MAGIC); + return false; + } + + // Preserve the decryption key + rich_hdr.DecryptionKey = key; + + return true; +} + +bool readDosHeader(bounded_buffer *file, dos_header &dos_hdr) { + if (file == nullptr) { + return false; + } + + READ_WORD(file, 0, dos_hdr, e_magic); + READ_WORD(file, 0, dos_hdr, e_cblp); + READ_WORD(file, 0, dos_hdr, e_cp); + READ_WORD(file, 0, dos_hdr, e_crlc); + READ_WORD(file, 0, dos_hdr, e_cparhdr); + READ_WORD(file, 0, dos_hdr, e_minalloc); + READ_WORD(file, 0, dos_hdr, e_maxalloc); + READ_WORD(file, 0, dos_hdr, e_ss); + READ_WORD(file, 0, dos_hdr, e_sp); + READ_WORD(file, 0, dos_hdr, e_csum); + READ_WORD(file, 0, dos_hdr, e_ip); + READ_WORD(file, 0, dos_hdr, e_cs); + READ_WORD(file, 0, dos_hdr, e_lfarlc); + READ_WORD(file, 0, dos_hdr, e_ovno); + READ_WORD(file, 0, dos_hdr, e_res[0]); + READ_WORD(file, 0, dos_hdr, e_res[1]); + READ_WORD(file, 0, dos_hdr, e_res[2]); + READ_WORD(file, 0, dos_hdr, e_res[3]); + READ_WORD(file, 0, dos_hdr, e_oemid); + READ_WORD(file, 0, dos_hdr, e_oeminfo); + READ_WORD(file, 0, dos_hdr, e_res2[0]); + READ_WORD(file, 0, dos_hdr, e_res2[1]); + READ_WORD(file, 0, dos_hdr, e_res2[2]); + READ_WORD(file, 0, dos_hdr, e_res2[3]); + READ_WORD(file, 0, dos_hdr, e_res2[4]); + READ_WORD(file, 0, dos_hdr, e_res2[5]); + READ_WORD(file, 0, dos_hdr, e_res2[6]); + READ_WORD(file, 0, dos_hdr, e_res2[7]); + READ_WORD(file, 0, dos_hdr, e_res2[8]); + READ_WORD(file, 0, dos_hdr, e_res2[9]); + READ_DWORD(file, 0, dos_hdr, e_lfanew); + + return true; +} + bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) { if (file == nullptr) { return false; } - // start by reading MZ - std::uint16_t tmp = 0; - std::uint32_t curOffset = 0; - if (!readWord(file, curOffset, tmp)) { - PE_ERR(PEERR_READ); - return false; - } - if (tmp != MZ_MAGIC) { + // read the DOS header + readDosHeader(file, p.dos); + + if (p.dos.e_magic != MZ_MAGIC) { PE_ERR(PEERR_MAGIC); return false; } - // read the offset to the NT headers - std::uint32_t offset; - if (!readDword(file, _offset(dos_header, e_lfanew), offset)) { - PE_ERR(PEERR_READ); - return false; + // get the offset to the NT headers + std::uint32_t offset = p.dos.e_lfanew; + std::uint32_t curOffset = offset; + + // read rich header + std::uint32_t dword; + std::uint32_t rich_end_signature_offset = 0; + std::uint32_t xor_key; + bool found_rich = false; + + // Start reading from RICH_OFFSET (0x80), a known Rich header offset. + // Note: 0x80 is based on anecdotal evidence. + // + // Iterate over the DWORDs, hence why i increments 4 bytes at a time. + for (std::uint32_t i = RICH_OFFSET; i < offset; i += 4) { + if (!readDword(file, i, dword)) { + PE_ERR(PEERR_READ); + return false; + } + + // Found the trailing Rich signature + if (dword == RICH_MAGIC_END) { + found_rich = true; + rich_end_signature_offset = i; + break; + } + } + + if (found_rich) { + // Get the XOR decryption key. It is the DWORD immediately + // after the Rich signature. + if (!readDword(file, rich_end_signature_offset + 4, xor_key)) { + PE_ERR(PEERR_READ); + return false; + } + + // Split the Rich header out into its own buffer + bounded_buffer *richBuf = + splitBuffer(file, 0x80, rich_end_signature_offset + 4); + if (richBuf == nullptr) { + return false; + } + + readRichHeader(richBuf, xor_key, p.rich); + if (richBuf != nullptr) { + deleteBuffer(richBuf); + } + + // Split the DOS header into a separate buffer which + // starts at offset 0 and has length 0x80 + bounded_buffer *dosBuf = splitBuffer(file, 0, RICH_OFFSET); + if (dosBuf == nullptr) { + return false; + } + // Calculate checksum + p.rich.Checksum = calculateRichChecksum(dosBuf, p); + if (p.rich.Checksum == p.rich.DecryptionKey) { + p.rich.isValid = true; + } else { + p.rich.isValid = false; + } + if (dosBuf != nullptr) { + deleteBuffer(dosBuf); + } + + // Rich header not present + } else { + p.rich.isPresent = false; } - curOffset += offset; // now, we can read out the fields of the NT headers bounded_buffer *ntBuf = splitBuffer(file, curOffset, file->bufLen); @@ -837,12 +1440,12 @@ bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) { std::uint32_t rem_size; if (p.nt.OptionalMagic == NT_OPTIONAL_32_MAGIC) { // signature + file_header + optional_header_32 - rem_size = - sizeof(std::uint32_t) + sizeof(file_header) + sizeof(optional_header_32); + rem_size = sizeof(std::uint32_t) + sizeof(file_header) + + sizeof(optional_header_32); } else if (p.nt.OptionalMagic == NT_OPTIONAL_64_MAGIC) { // signature + file_header + optional_header_64 - rem_size = - sizeof(std::uint32_t) + sizeof(file_header) + sizeof(optional_header_64); + rem_size = sizeof(std::uint32_t) + sizeof(file_header) + + sizeof(optional_header_64); } else { PE_ERR(PEERR_MAGIC); deleteBuffer(ntBuf); @@ -887,7 +1490,7 @@ bool getExports(parsed_pe *p) { // get the name of this module std::uint32_t nameRva; if (!readDword(s.sectionData, - rvaofft + _offset(export_dir_table, NameRVA), + rvaofft + offsetof(export_dir_table, NameRVA), nameRva)) { return false; } @@ -915,7 +1518,7 @@ bool getExports(parsed_pe *p) { // now, get all the named export symbols std::uint32_t numNames; if (!readDword(s.sectionData, - rvaofft + _offset(export_dir_table, NumberOfNamePointers), + rvaofft + offsetof(export_dir_table, NumberOfNamePointers), numNames)) { return false; } @@ -924,7 +1527,7 @@ bool getExports(parsed_pe *p) { // get the names section std::uint32_t namesRVA; if (!readDword(s.sectionData, - rvaofft + _offset(export_dir_table, NamePointerRVA), + rvaofft + offsetof(export_dir_table, NamePointerRVA), namesRVA)) { return false; } @@ -949,7 +1552,8 @@ bool getExports(parsed_pe *p) { // get the EAT section std::uint32_t eatRVA; if (!readDword(s.sectionData, - rvaofft + _offset(export_dir_table, ExportAddressTableRVA), + rvaofft + + offsetof(export_dir_table, ExportAddressTableRVA), eatRVA)) { return false; } @@ -973,7 +1577,7 @@ bool getExports(parsed_pe *p) { // get the ordinal base std::uint32_t ordinalBase; if (!readDword(s.sectionData, - rvaofft + _offset(export_dir_table, OrdinalBase), + rvaofft + offsetof(export_dir_table, OrdinalBase), ordinalBase)) { return false; } @@ -981,7 +1585,7 @@ bool getExports(parsed_pe *p) { // get the ordinal table std::uint32_t ordinalTableRVA; if (!readDword(s.sectionData, - rvaofft + _offset(export_dir_table, OrdinalTableRVA), + rvaofft + offsetof(export_dir_table, OrdinalTableRVA), ordinalTableRVA)) { return false; } @@ -1124,13 +1728,13 @@ bool getRelocations(parsed_pe *p) { std::uint32_t blockSize; if (!readDword(d.sectionData, - rvaofft + _offset(reloc_block, PageRVA), + rvaofft + offsetof(reloc_block, PageRVA), pageRva)) { return false; } if (!readDword(d.sectionData, - rvaofft + _offset(reloc_block, BlockSize), + rvaofft + offsetof(reloc_block, BlockSize), blockSize)) { return false; } @@ -1374,8 +1978,8 @@ bool getImports(parsed_pe *p) { ent.moduleName = modName; p->internal->imports.push_back(ent); } else { - std::string symName = - "ORDINAL_" + modName + "_" + to_string(oval, std::dec); + std::string symName = "ORDINAL_" + modName + "_" + + to_string(oval, std::dec); importent ent; @@ -1424,7 +2028,8 @@ bool getSymbolTable(parsed_pe *p) { std::uint32_t offset = p->peHeader.nt.FileHeader.PointerToSymbolTable; - for (std::uint32_t i = 0; i < p->peHeader.nt.FileHeader.NumberOfSymbols; i++) { + for (std::uint32_t i = 0; i < p->peHeader.nt.FileHeader.NumberOfSymbols; + i++) { symbol sym; // Read name @@ -1452,7 +2057,8 @@ bool getSymbolTable(parsed_pe *p) { strOffset += sizeof(std::uint8_t); } } else { - for (std::uint8_t n = 0; n < NT_SHORT_NAME_LEN && sym.name.shortName[n] != 0; + for (std::uint8_t n = 0; + n < NT_SHORT_NAME_LEN && sym.name.shortName[n] != 0; n++) { sym.strName.push_back(static_cast(sym.name.shortName[n])); } @@ -1712,31 +2318,36 @@ bool getSymbolTable(parsed_pe *p) { } } else { -// std::ios::fmtflags originalStreamFlags(std::cerr.flags()); +#ifdef PEPARSE_LIBRARY_WARNINGS + std::ios::fmtflags originalStreamFlags(std::cerr.flags()); -// auto storageClassName = GetSymbolTableStorageClassName(sym.storageClass); -// if (storageClassName == nullptr) { -// std::cerr << "Warning: Skipping auxiliary symbol of type 0x" << std::hex -// << static_cast(sym.storageClass) -// << " at offset 0x" << std::hex << offset << "\n"; -// } else { -// std::cerr << "Warning: Skipping auxiliary symbol of type " -// << storageClassName << " at offset 0x" << std::hex << offset -// << "\n"; -// } + auto storageClassName = GetSymbolTableStorageClassName(sym.storageClass); + if (storageClassName == nullptr) { + std::cerr << "Warning: Skipping auxiliary symbol of type 0x" << std::hex + << static_cast(sym.storageClass) + << " at offset 0x" << std::hex << offset << "\n"; + } else { -// std::cerr.flags(originalStreamFlags); + std::cerr << "Warning: Skipping auxiliary symbol of type " + << storageClassName << " at offset 0x" << std::hex << offset + << "\n"; + } + + std::cerr.flags(originalStreamFlags); +#endif offset = nextSymbolOffset; } if (offset != nextSymbolOffset) { -// std::ios::fmtflags originalStreamFlags(std::cerr.flags()); +#ifdef PEPARSE_LIBRARY_WARNINGS + std::ios::fmtflags originalStreamFlags(std::cerr.flags()); -// std::cerr << "Warning: Invalid internal offset (current: 0x" << std::hex -// << offset << ", expected: 0x" << std::hex << nextSymbolOffset -// << ")\n"; + std::cerr << "Warning: Invalid internal offset (current: 0x" << std::hex + << offset << ", expected: 0x" << std::hex << nextSymbolOffset + << ")\n"; -// std::cerr.flags(originalStreamFlags); + std::cerr.flags(originalStreamFlags); +#endif offset = nextSymbolOffset; } } @@ -1744,7 +2355,7 @@ bool getSymbolTable(parsed_pe *p) { return true; } -parsed_pe *ParsePEFromFile(const char *filePath) { +parsed_pe *ParsePEFromBuffer(bounded_buffer *buffer) { // First, create a new parsed_pe structure // We pass std::nothrow parameter to new so in case of failure it returns // nullptr instead of throwing exception std::bad_alloc. @@ -1756,13 +2367,7 @@ parsed_pe *ParsePEFromFile(const char *filePath) { } // Make a new buffer object to hold just our file data - p->fileBuffer = readFileToFileBuffer(filePath); - - if (p->fileBuffer == nullptr) { - delete p; - // err is set by readFileToFileBuffer - return nullptr; - } + p->fileBuffer = buffer; p->internal = new (std::nothrow) parsed_pe_internal(); @@ -1838,6 +2443,28 @@ parsed_pe *ParsePEFromFile(const char *filePath) { return p; } +parsed_pe *ParsePEFromFile(const char *filePath) { + auto buffer = readFileToFileBuffer(filePath); + + if (buffer == nullptr) { + // err is set by readFileToFileBuffer + return nullptr; + } + + return ParsePEFromBuffer(buffer); +} + +parsed_pe *ParsePEFromPointer(std::uint8_t *ptr, std::uint32_t sz) { + auto buffer = makeBufferFromPointer(ptr, sz); + + if (buffer == nullptr) { + // err is set by makeBufferFromPointer + return nullptr; + } + + return ParsePEFromBuffer(buffer); +} + void DestructParsedPE(parsed_pe *p) { if (p == nullptr) { return; @@ -1865,7 +2492,7 @@ void DestructParsedPE(parsed_pe *p) { void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd) { std::vector &l = pe->internal->imports; - for (importent i : l) { + for (importent &i : l) { if (cb(cbd, i.addr, i.moduleName, i.symbolName) != 0) { break; } @@ -1878,7 +2505,7 @@ void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd) { void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) { std::vector &l = pe->internal->relocs; - for (reloc r : l) { + for (reloc &r : l) { if (cb(cbd, r.shiftedAddr, r.type) != 0) { break; } @@ -1891,7 +2518,7 @@ void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) { void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) { std::vector &l = pe->internal->symbols; - for (symbol s : l) { + for (symbol &s : l) { if (cb(cbd, s.strName, s.value, @@ -1910,7 +2537,7 @@ void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) { void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) { std::vector &l = pe->internal->exports; - for (exportent i : l) { + for (exportent &i : l) { if (cb(cbd, i.addr, i.moduleName, i.symbolName) != 0) { break; } @@ -1923,7 +2550,7 @@ void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) { void IterSec(parsed_pe *pe, iterSec cb, void *cbd) { parsed_pe_internal *pint = pe->internal; - for (section s : pint->secs) { + for (section &s : pint->secs) { if (cb(cbd, s.sectionBase, s.sectionName, s.sec, s.sectionData) != 0) { break; } @@ -2037,4 +2664,68 @@ const char *GetSubsystemAsString(parsed_pe *pe) { } } +bool GetDataDirectoryEntry(parsed_pe *pe, + data_directory_kind dirnum, + std::vector &raw_entry) { + raw_entry.clear(); + + if (pe == nullptr) { + PE_ERR(PEERR_NONE); + return false; + } + + data_directory dir; + VA addr; + if (pe->peHeader.nt.OptionalMagic == NT_OPTIONAL_32_MAGIC) { + dir = pe->peHeader.nt.OptionalHeader.DataDirectory[dirnum]; + addr = dir.VirtualAddress + pe->peHeader.nt.OptionalHeader.ImageBase; + } else if (pe->peHeader.nt.OptionalMagic == NT_OPTIONAL_64_MAGIC) { + dir = pe->peHeader.nt.OptionalHeader64.DataDirectory[dirnum]; + addr = dir.VirtualAddress + pe->peHeader.nt.OptionalHeader64.ImageBase; + } else { + PE_ERR(PEERR_MAGIC); + return false; + } + + if (dir.Size <= 0) { + PE_ERR(PEERR_SIZE); + return false; + } + + /* NOTE(ww): DIR_SECURITY is an annoying special case: its contents + * are never mapped into memory, so its "RVA" is actually a direct + * file offset. + * See: + * https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only + */ + if (dirnum == DIR_SECURITY) { + auto *buf = splitBuffer( + pe->fileBuffer, dir.VirtualAddress, dir.VirtualAddress + dir.Size); + if (buf == nullptr) { + PE_ERR(PEERR_SIZE); + return false; + } + + raw_entry.assign(buf->buf, buf->buf + buf->bufLen); + deleteBuffer(buf); + } else { + section sec; + if (!getSecForVA(pe->internal->secs, addr, sec)) { + PE_ERR(PEERR_SECTVA); + return false; + } + + auto off = static_cast(addr - sec.sectionBase); + if (off + dir.Size >= sec.sectionData->bufLen) { + PE_ERR(PEERR_SIZE); + return false; + } + + raw_entry.assign(sec.sectionData->buf + off, + sec.sectionData->buf + off + dir.Size); + } + + return true; +} + } // namespace peparse diff --git a/pe-parser-library/src/unicode_codecvt.cpp b/pe-parser-library/src/unicode_codecvt.cpp new file mode 100644 index 0000000..22ea1d4 --- /dev/null +++ b/pe-parser-library/src/unicode_codecvt.cpp @@ -0,0 +1,36 @@ +/* +The MIT License (MIT) + +Copyright (c) 2019 Trail of Bits, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +namespace peparse { +// See +// https://stackoverflow.com/questions/38688417/utf-conversion-functions-in-c11 +std::string from_utf16(const UCharString &u) { + std::wstring_convert, char16_t> convert; + return convert.to_bytes(u); +} +} // namespace peparse diff --git a/pe-parser-library/src/unicode_winapi.cpp b/pe-parser-library/src/unicode_winapi.cpp new file mode 100644 index 0000000..7518a1d --- /dev/null +++ b/pe-parser-library/src/unicode_winapi.cpp @@ -0,0 +1,56 @@ +/* +The MIT License (MIT) + +Copyright (c) 2020 Trail of Bits, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +namespace peparse { +std::string from_utf16(const UCharString &u) { + std::string result; + std::size_t size = WideCharToMultiByte(CP_UTF8, + 0, + u.data(), + static_cast(u.size()), + nullptr, + 0, + nullptr, + nullptr); + + if (size <= 0) { + return result; + } + + result.reserve(size); + WideCharToMultiByte(CP_UTF8, + 0, + u.data(), + static_cast(u.size()), + &result[0], + static_cast(result.capacity()), + nullptr, + nullptr); + + return result; +} +} // namespace peparse diff --git a/pepy/README.md b/pepy/README.md new file mode 100644 index 0000000..2213119 --- /dev/null +++ b/pepy/README.md @@ -0,0 +1,204 @@ +pepy +==== +pepy (pronounced p-pie) is a python binding to the pe-parse parser. + +pepy supports Python versions 3.6 and above. + +The easiest way to use pepy is to install it via pip: + +```bash +$ pip3 install pepy +``` + +## Building + +If you can build pe-parse and have a working python environment (headers and +libraries) you can build pepy. + +1. Build pepy: + * `python3 setup.py build` +2. Install pepy: + * `python3 setup.py install` + +**Building on Windows:** Python 3.x is typically installed as _python.exe_, +**NOT** _python3.exe_. + +## Using + +### Parsed object + +There are a number of objects involved in pepy. The main one is the **parsed** +object. This object is returned by the *parse* method. + +```python +import pepy +p = pepy.parse("/path/to/exe") +``` + +The **parsed** object has a number of methods: + +* `get_entry_point`: Return the entry point address +* `get_machine_as_str`: Return the machine as a human readable string +* `get_subsystem_as_str`: Return the subsystem as a human readable string +* `get_bytes`: Return the first N bytes at a given address +* `get_sections`: Return a list of section objects +* `get_imports`: Return a list of import objects +* `get_exports`: Return a list of export objects +* `get_relocations`: Return a list of relocation objects +* `get_resources`: Return a list of resource objects + +The **parsed** object has a number of attributes: + +* `signature` +* `machine` +* `numberofsections` +* `timedatestamp` +* `numberofsymbols` +* `characteristics` +* `magic` +* `majorlinkerver` +* `minorlinkerver` +* `codesize` +* `initdatasize` +* `uninitdatasize` +* `entrypointaddr` +* `baseofcode` +* `baseofdata` +* `imagebase` +* `sectionalignement` +* `filealignment` +* `majorosver` +* `minorosver` +* `win32ver` +* `imagesize` +* `headersize` +* `checksum` +* `subsystem` +* `dllcharacteristics` +* `stackreservesize` +* `stackcommitsize` +* `heapreservesize` +* `heapcommitsize` +* `loaderflags` +* `rvasandsize` + +Example: + +```python +import time +import pepy + +p = pepy.parse("/path/to/exe") +print("Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))) +ep = p.get_entry_point() +print("Entry point: 0x%x" % ep) +``` + +The `get_sections`, `get_imports`, `get_exports`, `get_relocations` and +`get_resources` methods each return a list of objects. The type of object +depends upon the method called. `get_sections` returns a list of `section` +objects, `get_imports` returns a list of `import` objects, etc. + +### Section Object + +The `section` object has the following attributes: + +* `base` +* `length` +* `virtaddr` +* `virtsize` +* `numrelocs` +* `numlinenums` +* `characteristics` +* `data` + +### Import Object + +The `import` object has the following attributes: + +* `sym` +* `name` +* `addr` + +### Export Object + +The `export` object has the following attributes: + +* `mod` +* `func` +* `addr` + +### Relocation Object + +The `relocation` object has the following attributes: + +* `type` +* `addr` + +### Resource Object + +The `resource` object has the following attributes: + +* `type_str` +* `name_str` +* `lang_str` +* `type` +* `name` +* `lang` +* `codepage` +* `RVA` +* `size` +* `data` + +The `resource` object has the following methods: + +* `type_as_str` + +Resources are stored in a directory structure. The first three levels of the +are called `type`, `name` and `lang`. Each of these levels can have +either a pre-defined value or a custom string. The pre-defined values are +stored in the `type`, `name` and `lang` attributes. If a custom string is +found it will be stored in the `type_str`, `name_str` and `lang_str` +attributes. The `type_as_str` method can be used to convert a pre-defined +type value to a string representation. + +The following code shows how to iterate through resources: + +```python +import pepy + +from hashlib import md5 +import sys + +p = pepy.parse(sys.argv[1]) +resources = p.get_resources() +print("Resources: (%i)" % len(resources)) +for resource in resources: + print("[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())) + if resource.type_str: + print("\tType string: %s" % resource.type_str) + else: + print("\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())) + if resource.name_str: + print("\tName string: %s" % resource.name_str) + else: + print("\tName: %s" % hex(resource.name)) + if resource.lang_str: + print("\tLang string: %s" % resource.lang_str) + else: + print("\tLang: %s" % hex(resource.lang)) + print("\tCodepage: %s" % hex(resource.codepage)) + print("\tRVA: %s" % hex(resource.RVA)) + print("\tSize: %s" % hex(resource.size)) +``` + +Note that some binaries (particularly packed) may have corrupt resource entries. +In these cases you may find that `len(resource.data)` is 0 but `resource.size` is +greater than 0. The `size` attribute is the size of the data as declared by the +resource data entry. + +## Authors + +pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com). + +pepy was written by Wesley Shields (wxs@atarininja.org). diff --git a/python/pepy.cpp b/pepy/pepy.cpp similarity index 93% rename from python/pepy.cpp rename to pepy/pepy.cpp index ce11d57..c22a62e 100644 --- a/python/pepy.cpp +++ b/pepy/pepy.cpp @@ -26,31 +26,14 @@ */ #include -#include +#include #include using namespace peparse; -#define PEPY_VERSION "0.3" - -/* - * Add some definition for compatibility between python2 and python3 +/* NOTE(ww): These don't necessarily have to be the same, but currently are. */ -#if PY_MAJOR_VERSION >= 3 -#define PyString_FromString PyUnicode_FromString -#endif - -/* - * Some macro only available after python 2.6 - * Needed for compatibility with python3 - */ -#ifndef PyVarObject_HEAD_INIT -#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, -#endif - -#ifndef Py_TYPE -#define Py_TYPE(_ob_) (((PyObject *) (_ob_))->ob_type) -#endif +#define PEPY_VERSION PEPARSE_VERSION /* These are used to across multiple objects. */ #define PEPY_OBJECT_GET(OBJ, ATTR) \ @@ -154,9 +137,9 @@ static void pepy_import_dealloc(pepy_import *self) { Py_TYPE(self)->tp_free((PyObject *) self); } -PEPY_OBJECT_GET(import, name) -PEPY_OBJECT_GET(import, sym) -PEPY_OBJECT_GET(import, addr) +PEPY_OBJECT_GET(import, name); +PEPY_OBJECT_GET(import, sym); +PEPY_OBJECT_GET(import, addr); static PyGetSetDef pepy_import_getseters[] = { OBJECTGETTER(import, name, "Name"), @@ -228,9 +211,9 @@ static void pepy_export_dealloc(pepy_export *self) { Py_TYPE(self)->tp_free((PyObject *) self); } -PEPY_OBJECT_GET(export, mod) -PEPY_OBJECT_GET(export, func) -PEPY_OBJECT_GET(export, addr) +PEPY_OBJECT_GET(export, mod); +PEPY_OBJECT_GET(export, func); +PEPY_OBJECT_GET(export, addr); static PyGetSetDef pepy_export_getseters[] = { OBJECTGETTER(export, mod, "Module"), @@ -302,8 +285,8 @@ static void pepy_relocation_dealloc(pepy_relocation *self) { Py_TYPE(self)->tp_free((PyObject *) self); } -PEPY_OBJECT_GET(relocation, type) -PEPY_OBJECT_GET(relocation, addr) +PEPY_OBJECT_GET(relocation, type); +PEPY_OBJECT_GET(relocation, addr); static PyGetSetDef pepy_relocation_getseters[] = { OBJECTGETTER(relocation, type, "Type"), @@ -390,15 +373,15 @@ static void pepy_section_dealloc(pepy_section *self) { Py_TYPE(self)->tp_free((PyObject *) self); } -PEPY_OBJECT_GET(section, name) -PEPY_OBJECT_GET(section, base) -PEPY_OBJECT_GET(section, length) -PEPY_OBJECT_GET(section, virtaddr) -PEPY_OBJECT_GET(section, virtsize) -PEPY_OBJECT_GET(section, numrelocs) -PEPY_OBJECT_GET(section, numlinenums) -PEPY_OBJECT_GET(section, characteristics) -PEPY_OBJECT_GET(section, data) +PEPY_OBJECT_GET(section, name); +PEPY_OBJECT_GET(section, base); +PEPY_OBJECT_GET(section, length); +PEPY_OBJECT_GET(section, virtaddr); +PEPY_OBJECT_GET(section, virtsize); +PEPY_OBJECT_GET(section, numrelocs); +PEPY_OBJECT_GET(section, numlinenums); +PEPY_OBJECT_GET(section, characteristics); +PEPY_OBJECT_GET(section, data); static PyGetSetDef pepy_section_getseters[] = { OBJECTGETTER(section, name, "Name"), @@ -495,16 +478,16 @@ static void pepy_resource_dealloc(pepy_resource *self) { Py_TYPE(self)->tp_free((PyObject *) self); } -PEPY_OBJECT_GET(resource, type_str) -PEPY_OBJECT_GET(resource, name_str) -PEPY_OBJECT_GET(resource, lang_str) -PEPY_OBJECT_GET(resource, type) -PEPY_OBJECT_GET(resource, name) -PEPY_OBJECT_GET(resource, lang) -PEPY_OBJECT_GET(resource, codepage) -PEPY_OBJECT_GET(resource, RVA) -PEPY_OBJECT_GET(resource, size) -PEPY_OBJECT_GET(resource, data) +PEPY_OBJECT_GET(resource, type_str); +PEPY_OBJECT_GET(resource, name_str); +PEPY_OBJECT_GET(resource, lang_str); +PEPY_OBJECT_GET(resource, type); +PEPY_OBJECT_GET(resource, name); +PEPY_OBJECT_GET(resource, lang); +PEPY_OBJECT_GET(resource, codepage); +PEPY_OBJECT_GET(resource, RVA); +PEPY_OBJECT_GET(resource, size); +PEPY_OBJECT_GET(resource, data); static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) { PyObject *ret; @@ -587,7 +570,7 @@ static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) { break; } - ret = PyString_FromString(str); + ret = PyUnicode_FromString(str); if (!ret) { PyErr_SetString(pepy_error, "Unable to create return string."); return NULL; @@ -704,7 +687,8 @@ static PyObject *pepy_parsed_get_entry_point(PyObject *self, PyObject *args) { return ret; } -static PyObject *pepy_parsed_get_machine_as_str(PyObject *self, PyObject *args) { +static PyObject *pepy_parsed_get_machine_as_str(PyObject *self, + PyObject *args) { PyObject *ret; const char *str; @@ -712,7 +696,7 @@ static PyObject *pepy_parsed_get_machine_as_str(PyObject *self, PyObject *args) if (!str) Py_RETURN_NONE; - ret = PyString_FromString(str); + ret = PyUnicode_FromString(str); if (!ret) { PyErr_SetString(pepy_error, "Unable to create return string."); return NULL; @@ -721,7 +705,8 @@ static PyObject *pepy_parsed_get_machine_as_str(PyObject *self, PyObject *args) return ret; } -static PyObject *pepy_parsed_get_subsystem_as_str(PyObject *self, PyObject *args) { +static PyObject *pepy_parsed_get_subsystem_as_str(PyObject *self, + PyObject *args) { PyObject *ret; const char *str; @@ -729,7 +714,7 @@ static PyObject *pepy_parsed_get_subsystem_as_str(PyObject *self, PyObject *args if (!str) Py_RETURN_NONE; - ret = PyString_FromString(str); + ret = PyUnicode_FromString(str); if (!ret) { PyErr_SetString(pepy_error, "Unable to create return string."); return NULL; @@ -802,10 +787,10 @@ static PyObject *pepy_data_converter(bounded_buffer *data) { } int section_callback(void *cbd, - VA base, - std::string &name, - image_section_header s, - bounded_buffer *data) { + const VA &base, + const std::string &name, + const image_section_header &s, + const bounded_buffer *data) { uint32_t buflen; PyObject *sect; PyObject *tuple; @@ -876,7 +861,7 @@ static PyObject *pepy_parsed_get_sections(PyObject *self, PyObject *args) { return ret; } -int resource_callback(void *cbd, resource r) { +int resource_callback(void *cbd, const resource &r) { PyObject *rsrc; PyObject *tuple; PyObject *list = (PyObject *) cbd; @@ -885,15 +870,6 @@ int resource_callback(void *cbd, resource r) { * The tuple item order is important here. It is passed into the * section type initialization and parsed there. */ -#if PY_MAJOR_VERSION >= 3 - tuple = Py_BuildValue("u#u#u#IIIIIIO&", - r.type_str.c_str(), - r.type_str.length() / 2, - r.name_str.c_str(), - r.name_str.length() / 2, - r.lang_str.c_str(), - r.lang_str.length() / 2, -#else tuple = Py_BuildValue("s#s#s#IIIIIIO&", r.type_str.c_str(), r.type_str.length(), @@ -901,7 +877,6 @@ int resource_callback(void *cbd, resource r) { r.name_str.length(), r.lang_str.c_str(), r.lang_str.length(), -#endif r.type, r.name, r.lang, @@ -946,7 +921,7 @@ static PyObject *pepy_parsed_get_resources(PyObject *self, PyObject *args) { } int import_callback(void *cbd, - VA addr, + const VA &addr, const std::string &name, const std::string &sym) { PyObject *imp; @@ -993,7 +968,10 @@ static PyObject *pepy_parsed_get_imports(PyObject *self, PyObject *args) { return ret; } -int export_callback(void *cbd, VA addr, std::string &mod, std::string &func) { +int export_callback(void *cbd, + const VA &addr, + const std::string &mod, + const std::string &func) { PyObject *exp; PyObject *tuple; PyObject *list = (PyObject *) cbd; @@ -1042,7 +1020,7 @@ static PyObject *pepy_parsed_get_exports(PyObject *self, PyObject *args) { return ret; } -int reloc_callback(void *cbd, VA addr, reloc_type type) { +int reloc_callback(void *cbd, const VA &addr, const reloc_type &type) { PyObject *reloc; PyObject *tuple; PyObject *list = (PyObject *) cbd; @@ -1089,21 +1067,20 @@ static PyObject *pepy_parsed_get_relocations(PyObject *self, PyObject *args) { #define PEPY_PARSED_GET(ATTR, VAL) \ static PyObject *pepy_parsed_get_##ATTR(PyObject *self, void *closure) { \ - PyObject *ret = \ - PyLong_FromUnsignedLongLong( \ - ((pepy_parsed *) self)->pe->peHeader.nt.VAL); \ + PyObject *ret = PyLong_FromUnsignedLongLong( \ + ((pepy_parsed *) self)->pe->peHeader.nt.VAL); \ if (!ret) \ PyErr_SetString(PyExc_AttributeError, "Error getting attribute."); \ return ret; \ } -PEPY_PARSED_GET(signature, Signature) -PEPY_PARSED_GET(machine, FileHeader.Machine) -PEPY_PARSED_GET(numberofsections, FileHeader.NumberOfSections) -PEPY_PARSED_GET(timedatestamp, FileHeader.TimeDateStamp) -PEPY_PARSED_GET(numberofsymbols, FileHeader.NumberOfSymbols) -PEPY_PARSED_GET(characteristics, FileHeader.Characteristics) -PEPY_PARSED_GET(magic, OptionalMagic) +PEPY_PARSED_GET(signature, Signature); +PEPY_PARSED_GET(machine, FileHeader.Machine); +PEPY_PARSED_GET(numberofsections, FileHeader.NumberOfSections); +PEPY_PARSED_GET(timedatestamp, FileHeader.TimeDateStamp); +PEPY_PARSED_GET(numberofsymbols, FileHeader.NumberOfSymbols); +PEPY_PARSED_GET(characteristics, FileHeader.Characteristics); +PEPY_PARSED_GET(magic, OptionalMagic); /* * This is used to get things from the optional header, which can be either @@ -1134,8 +1111,8 @@ PEPY_PARSED_GET(magic, OptionalMagic) return ret; \ } -PEPY_PARSED_GET_OPTIONAL(majorlinkerver, MajorLinkerVersion) -PEPY_PARSED_GET_OPTIONAL(minorlinkerver, MinorLinkerVersion) +PEPY_PARSED_GET_OPTIONAL(majorlinkerver, MajorLinkerVersion); +PEPY_PARSED_GET_OPTIONAL(minorlinkerver, MinorLinkerVersion); PEPY_PARSED_GET_OPTIONAL(codesize, SizeOfCode); PEPY_PARSED_GET_OPTIONAL(initdatasize, SizeOfInitializedData); PEPY_PARSED_GET_OPTIONAL(uninitdatasize, SizeOfUninitializedData); @@ -1337,7 +1314,7 @@ static PyObject *pepy_parse(PyObject *self, PyObject *args) { static PyMethodDef pepy_methods[] = { {"parse", pepy_parse, METH_VARARGS, "Parse PE from file."}, {NULL}}; -static PyObject *pepi_module_init(void) { +PyMODINIT_FUNC PyInit_pepy(void) { PyObject *m; if (PyType_Ready(&pepy_parsed_type) < 0 || @@ -1348,7 +1325,6 @@ static PyObject *pepi_module_init(void) { PyType_Ready(&pepy_resource_type) < 0) return NULL; -#if PY_MAJOR_VERSION >= 3 static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "pepy", @@ -1360,13 +1336,8 @@ static PyObject *pepi_module_init(void) { NULL, NULL, }; -#endif -#if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&moduledef); -#else - m = Py_InitModule3("pepy", pepy_methods, "Python interface to pe-parse."); -#endif if (!m) return NULL; @@ -1393,6 +1364,8 @@ static PyObject *pepi_module_init(void) { PyModule_AddObject(m, "pepy_resource", (PyObject *) &pepy_resource_type); PyModule_AddStringMacro(m, PEPY_VERSION); + PyModule_AddStringMacro(m, PEPARSE_VERSION); + PyModule_AddStringConstant(m, "__version__", PEPY_VERSION); PyModule_AddIntMacro(m, MZ_MAGIC); PyModule_AddIntMacro(m, NT_MAGIC); @@ -1456,13 +1429,3 @@ static PyObject *pepi_module_init(void) { return m; } - -#if PY_MAJOR_VERSION >= 3 -PyMODINIT_FUNC PyInit_pepy(void) { - return pepi_module_init(); -} -#else -PyMODINIT_FUNC initpepy(void) { - pepi_module_init(); -} -#endif diff --git a/python/README.md b/python/README.md deleted file mode 100644 index 2be2c02..0000000 --- a/python/README.md +++ /dev/null @@ -1,217 +0,0 @@ -pepy -==== -pepy (pronounced p-pie) is a python binding to the pe-parse parser. - -Building -======== -If you can build pe-parse and have a working python environment (headers and -libraries) you can build pepy. - -Python 2.7 ----------- -1. Build pepy: - * python setup.py build -2. Install pepy: - * python setup.py install - -**Building on Windows:** If you get a build error of 'Unable to find -vcvarsall.bat', you must set the `VS90COMNTOOLS` environment variable prior -to the appropriate path as per -[this SO article](http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat): -> While running setup.py for package installations, Python 2.7 searches for an -> installed Visual Studio 2008. You can trick Python to use a newer Visual -> Studio by setting the correct path in VS90COMNTOOLS environment variable -> before calling setup.py. -> -> Execute the following command based on the version of Visual Studio installed: -> * Visual Studio 2010 (VS10): `SET VS90COMNTOOLS=%VS100COMNTOOLS%` -> * Visual Studio 2012 (VS11): `SET VS90COMNTOOLS=%VS110COMNTOOLS%` -> * Visual Studio 2013 (VS12): `SET VS90COMNTOOLS=%VS120COMNTOOLS%` -> * Visual Studio 2015/2017 (VS14): `SET VS90COMNTOOLS=%VS140COMNTOOLS%` - -Python 3.x ----------- -1. Build pepy: - * python3 setup.py build -2. Install pepy: - * python3 setup.py install - -**Building on Windows:** Python 3.x is typically installed as _python.exe_ -**NOT** _python3.exe_. - -Using -===== -Parsed object -------------- -There are a number of objects involved in pepy. The main one is the **parsed** -object. This object is returned by the *parse* method. - -``` -import pepy -p = pepy.parse("/path/to/exe") -``` - -The **parsed** object has a number of methods: - -* get_entry_point: Return the entry point address -* get_machine_as_str: Return the machine as a human readable string -* get_subsystem_as_str: Return the subsystem as a human readable string -* get_bytes: Return the first N bytes at a given address -* get_sections: Return a list of section objects -* get_imports: Return a list of import objects -* get_exports: Return a list of export objects -* get_relocations: Return a list of relocation objects -* get_resources: Return a list of resource objects - -The **parsed** object has a number of attributes: - -* signature -* machine -* numberofsections -* timedatestamp -* numberofsymbols -* characteristics -* magic -* majorlinkerver -* minorlinkerver -* codesize -* initdatasize -* uninitdatasize -* entrypointaddr -* baseofcode -* baseofdata -* imagebase -* sectionalignement -* filealignment -* majorosver -* minorosver -* win32ver -* imagesize -* headersize -* checksum -* subsystem -* dllcharacteristics -* stackreservesize -* stackcommitsize -* heapreservesize -* heapcommitsize -* loaderflags -* rvasandsize - -Example: -``` -import time -import pepy - -p = pepy.parse("/path/to/exe") -print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp)) -ep = p.get_entry_point() -print "Entry point: 0x%x" % ep -``` - -The *get_sections*, *get_imports*, *get_exports*, *get_relocations* and -*get_resources* methods each return a list of objects. The type of object -depends upon the method called. *get_sections* returns a list of **section** -objects, *get_imports* returns a list of **import** objects, etc. - -Section Object --------------- -The **section** object has the following attributes: - -* base -* length -* virtaddr -* virtsize -* numrelocs -* numlinenums -* characteristics -* data - -Import Object -------------- -The **import** object has the following attributes: - -* sym -* name -* addr - -Export Object -------------- -The **export** object has the following attributes: - -* mod -* func -* addr - -Relocation Object ------------------ -The **relocation** object has the following attributes: - -* type -* addr - -Resource Object ---------------- -The **resource** object has the following attributes: - -* type_str -* name_str -* lang_str -* type -* name -* lang -* codepage -* RVA -* size -* data - -The **resource** object has the following methods: - -* type_as_str - -Resources are stored in a directory structure. The first three levels of the -are called **type**, **name** and **lang**. Each of these levels can have -either a pre-defined value or a custom string. The pre-defined values are -stored in the *type*, *name* and *lang* attributes. If a custom string is -found it will be stored in the *type_str*, *name_str* and *lang_str* -attributes. The *type_as_str* method can be used to convert a pre-defined -type value to a string representation. - -The following code shows how to iterate through resources: - -``` -import pepy - -from hashlib import md5 - -p = pepy.parse(sys.argv[1]) -resources = p.get_resources() -print "Resources: (%i)" % len(resources) -for resource in resources: - print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest()) - if resource.type_str: - print "\tType string: %s" % resource.type_str - else: - print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str()) - if resource.name_str: - print "\tName string: %s" % resource.name_str - else: - print "\tName: %s" % hex(resource.name) - if resource.lang_str: - print "\tLang string: %s" % resource.lang_str - else: - print "\tLang: %s" % hex(resource.lang) - print "\tCodepage: %s" % hex(resource.codepage) - print "\tRVA: %s" % hex(resource.RVA) - print "\tSize: %s" % hex(resource.size) -``` - -Note that some binaries (particularly packed) may have corrupt resource entries. -In these cases you may find that len(resource.data) is 0 but resource.size is -greater than 0. The *size* attribute is the size of the data as declared by the -resource data entry. - -Authors -======= -pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com) -pepy was written by Wesley Shields (wxs@atarininja.org) diff --git a/python/setup.py b/python/setup.py deleted file mode 100644 index 1e607c3..0000000 --- a/python/setup.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2013, Wesley Shields . All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. - -from distutils.core import setup, Extension -import os -import sys -import platform - -here = os.path.abspath(os.path.dirname(__file__)) - -SOURCE_FILES = [os.path.join(here, 'pepy.cpp'), - os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'src', 'parse.cpp')), - os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'src', 'buffer.cpp'))] - -if platform.system() == 'Windows': - INCLUDE_DIRS = [os.path.abspath(os.path.join(os.path.dirname(sys.executable), 'include')), - os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'include')), - 'C:\\usr\\include'] - LIBRARY_DIRS = [os.path.abspath(os.path.join(os.path.dirname(sys.executable), 'libs')), - 'C:\\usr\\lib'] - COMPILE_ARGS = ["/EHsc"] -else: - INCLUDE_DIRS = ['/usr/local/include', - '/opt/local/include', - '/usr/include', - os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'include'))] - LIBRARY_DIRS = ['/usr/lib', - '/usr/local/lib'] - COMPILE_ARGS = ["-std=c++11", "-g", "-O0"] # Debug only - -extension_mod = Extension('pepy', - sources = SOURCE_FILES, - extra_compile_args = COMPILE_ARGS, - language='c++', - include_dirs = INCLUDE_DIRS, - library_dirs = LIBRARY_DIRS) - - -setup (name = 'pepy', - version = '0.1', - description = 'python bindings for pe-parse', - author = 'Wesley Shields', - author_email = 'wxs@atarininja.org', - license = 'BSD', - long_description = 'Python bindings for pe-parse', - ext_modules = [extension_mod]) diff --git a/python/test.py b/python/test.py deleted file mode 100755 index a6f3db5..0000000 --- a/python/test.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python - -import sys -import time -import pepy -import binascii - -from hashlib import md5 - -try: - p = pepy.parse(sys.argv[1]) -except pepy.error as e: - print e - sys.exit(1) - -print "Magic: %s" % hex(p.magic) -print "Signature: %s" % hex(p.signature) -print "Machine: %s (%s)" % (hex(p.machine), p.get_machine_as_str()) -print "Number of sections: %s" % p.numberofsections -print "Number of symbols: %s" % p.numberofsymbols -print "Characteristics: %s" % hex(p.characteristics) -print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp)) -print "Major linker version: %s" % hex(p.majorlinkerver) -print "Minor linker version: %s" % hex(p.minorlinkerver) -print "Size of code: %s" % hex(p.codesize) -print "Size of initialized data: %s" % hex(p.initdatasize) -print "Size of uninitialized data: %s" % hex(p.uninitdatasize) -print "Address of entry point: %s" % hex(p.entrypointaddr) -print "Base address of code: %s" % hex(p.baseofcode) -try: - print "Base address of data: %s" % hex(p.baseofdata) -except: - # Not available on PE32+, ignore it. - pass -print "Image base address: %s" % hex(p.imagebase) -print "Section alignment: %s" % hex(p.sectionalignement) -print "File alignment: %s" % hex(p.filealignment) -print "Major OS version: %s" % hex(p.majorosver) -print "Minor OS version: %s" % hex(p.minorosver) -print "Win32 version: %s" % hex(p.win32ver) -print "Size of image: %s" % hex(p.imagesize) -print "Size of headers: %s" % hex(p.headersize) -print "Checksum: %s" % hex(p.checksum) -print "Subsystem: %s (%s)" % (hex(p.subsystem), p.get_subsystem_as_str()) -print "DLL characteristics: %s" % hex(p.dllcharacteristics) -print "Size of stack reserve: %s" % hex(p.stackreservesize) -print "Size of stack commit: %s" % hex(p.stackcommitsize) -print "Size of heap reserve: %s" % hex(p.heapreservesize) -print "Size of heap commit: %s" % hex(p.heapcommitsize) -print "Loader flags: %s" % hex(p.loaderflags) -print "Number of RVA and sizes: %s" % hex(p.rvasandsize) -ep = p.get_entry_point() -byts = p.get_bytes(ep, 8) -print "Bytes at %s: %s" % (hex(ep), ' '.join(['0x' + binascii.hexlify(b) for b in str(byts)])) -sections = p.get_sections() -print "Sections: (%i)" % len(sections) -for sect in sections: - print "[+] %s" % sect.name - print "\tBase: %s" % hex(sect.base) - print "\tLength: %s" % sect.length - print "\tVirtual address: %s" % hex(sect.virtaddr) - print "\tVirtual size: %i" % sect.virtsize - print "\tNumber of Relocations: %i" % sect.numrelocs - print "\tNumber of Line Numbers: %i" % sect.numlinenums - print "\tCharacteristics: %s" % hex(sect.characteristics) - if sect.length: - print "\tFirst 10 bytes: 0x%s" % binascii.hexlify(sect.data[:10]) - print "\tMD5: %s" % md5(sect.data).hexdigest() -imports = p.get_imports() -print "Imports: (%i)" % len(imports) -for imp in imports: - print "[+] Symbol: %s (%s %s)" % (imp.sym, imp.name, hex(imp.addr)) -exports = p.get_exports() -print "Exports: (%i)" % len(exports) -for exp in exports: - print "[+] Module: %s (%s %s)" % (exp.mod, exp.func, hex(exp.addr)) -relocations = p.get_relocations() -print "Relocations: (%i)" % len(relocations) -for reloc in relocations: - print "[+] Type: %s (%s)" % (reloc.type, hex(reloc.addr)) -resources = p.get_resources() -print "Resources: (%i)" % len(resources) -for resource in resources: - print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest()) - if resource.type_str: - print "\tType string: %s" % resource.type_str - else: - print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str()) - if resource.name_str: - print "\tName string: %s" % resource.name_str - else: - print "\tName: %s" % hex(resource.name) - if resource.lang_str: - print "\tLang string: %s" % resource.lang_str - else: - print "\tLang: %s" % hex(resource.lang) - print "\tCodepage: %s" % hex(resource.codepage) - print "\tRVA: %s" % hex(resource.RVA) - print "\tSize: %s" % hex(resource.size) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..039488e --- /dev/null +++ b/setup.py @@ -0,0 +1,101 @@ +# Copyright (c) 2013, Wesley Shields . All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +from setuptools import setup, Extension +import os +import sys +import platform + +here = os.path.dirname(__file__) +pepy = os.path.join(here, "pepy") + + +with open(os.path.join(pepy, "README.md")) as f: + README = f.read() + +with open(os.path.join(here, "VERSION")) as f: + VERSION = f.read().strip() + +SOURCE_FILES = [ + os.path.join(pepy, "pepy.cpp"), + os.path.join(here, "pe-parser-library", "src", "parse.cpp"), + os.path.join(here, "pe-parser-library", "src", "buffer.cpp"), +] + +INCLUDE_DIRS = [] +LIBRARY_DIRS = [] + +if platform.system() == "Windows": + SOURCE_FILES.append( + os.path.join(here, "pe-parser-library", "src", "unicode_winapi.cpp") + ) + INCLUDE_DIRS += [ + os.path.abspath(os.path.join(os.path.dirname(sys.executable), "include")), + os.path.join(here, "pe-parser-library", "include"), + "C:\\usr\\include", + ] + LIBRARY_DIRS += [ + os.path.abspath(os.path.join(os.path.dirname(sys.executable), "libs")), + "C:\\usr\\lib", + ] + COMPILE_ARGS = [ + "/EHsc", + f'/D"PEPARSE_VERSION=\\"{VERSION}\\""', + ] +else: + SOURCE_FILES.append( + os.path.join(here, "pe-parser-library", "src", "unicode_codecvt.cpp") + ) + INCLUDE_DIRS += [ + "/usr/local/include", + "/opt/local/include", + "/usr/include", + os.path.join(here, "pe-parser-library", "include"), + ] + LIBRARY_DIRS += ["/usr/lib", "/usr/local/lib"] + COMPILE_ARGS = ["-std=c++11", f'-DPEPARSE_VERSION="{VERSION}"'] + +extension_mod = Extension( + "pepy", + sources=SOURCE_FILES, + extra_compile_args=COMPILE_ARGS, + language="c++", + include_dirs=INCLUDE_DIRS, + library_dirs=LIBRARY_DIRS, +) + +setup( + name="pepy", + url="https://github.com/trailofbits/pe-parse", + python_requires=">= 3.6", + version=VERSION, + description="Python bindings for pe-parse", + long_description=README, + long_description_content_type="text/markdown", + author="Wesley Shields", + author_email="wxs@atarininja.org", + license="BSD", + ext_modules=[extension_mod], +) diff --git a/test/assets/example.exe b/test/assets/example.exe new file mode 100644 index 0000000..cbbda0c Binary files /dev/null and b/test/assets/example.exe differ diff --git a/python/test_python3.py b/test/test_pepy.py similarity index 100% rename from python/test_python3.py rename to test/test_pepy.py diff --git a/travis.sh b/travis.sh deleted file mode 100755 index 7be924f..0000000 --- a/travis.sh +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env bash - -main() { - if [ $# -ne 2 ] ; then - printf "Usage:\n\ttravis.sh \n" - return 1 - fi - - local platform_name="$1" - local operation_type="$2" - - if [[ "${platform_name}" != "osx" && "${platform_name}" != "linux" ]] ; then - printf "Invalid platform: ${platform_name}\n" - return 1 - fi - - if [[ "${operation_type}" == "initialize" ]] ; then - "${platform_name}_initialize" - return $? - - elif [[ "$operation_type" == "build" ]] ; then - "${platform_name}_build" - return $? - - else - printf "Invalid operation\n" - return 1 - fi -} - -get_processor_count() { - which nproc > /dev/null - if [ $? -eq 0 ] ; then - nproc - return 0 - fi - - which sysctl > /dev/null - if [ $? -eq 0 ] ; then - sysctl -n hw.ncpu - return 0 - fi - - return 1 -} - -linux_initialize() { - printf "Initializing platform: linux\n" - local log_file=`mktemp` - - printf " > Updating the package database..\n" - sudo apt-get -qq update > "${log_file}" 2>&1 - if [ $? -ne 0 ] ; then - printf " x The package database could not be updated\n\n\n" - cat "${log_file}" - return 1 - fi - - printf " > Installing the required packages...\n" - sudo apt-get install -qqy cmake python2.7 python-dev build-essential realpath > "${log_file}" 2>&1 - if [ $? -ne 0 ] ; then - printf " x Could not install the required dependencies\n\n\n" - cat "${log_file}" - return 1 - fi - - printf " > The system has been successfully initialized\n" - return 0 -} - -osx_initialize() { - printf "Initializing platform: macOS\n" - local log_file=`mktemp` - - printf " > Updating the package database..\n" - brew update > "${log_file}" 2>&1 - if [ $? -ne 0 ] ; then - printf " x The package database could not be updated\n\n\n" - cat "${log_file}" - return 1 - fi - - printf " > Installing CMake...\n" - brew install cmake > "${log_file}" 2>&1 - if [ $? -ne 0 ] ; then - printf " x Failed to install CMake\n\n\n" - cat "${log_file}" - fi - - printf " > The system has been successfully initialized\n" - return 0 -} - -common_build() { - printf "Gathering system information...\n" - - which cmake > /dev/null - printf " > CMake version: " - if [ $? -eq 0 ] ; then - cmake --version | head -n 1 - else - printf "not found\n" - fi - - which gcc > /dev/null - printf " > GCC version: " - if [ $? -eq 0 ] ; then - gcc --version | head -n 1 - else - printf "not found\n" - fi - - which clang > /dev/null - printf " > Clang version: " - if [ $? -eq 0 ] ; then - clang --version | head -n 1 - else - printf "not found\n" - fi - - printf "\n" - - printf "Library\n" - if [ ! -d "build" ] ; then - printf " > Creating the build directory...\n" - mkdir "build" - if [ $? -ne 0 ] ; then - printf " x Failed to create the build directory\n" - return 1 - fi - fi - - local log_file=`mktemp` - local processor_count=`get_processor_count` - - printf " > Configuring...\n" - ( cd "build" && cmake .. ) > "$log_file" 2>&1 - if [ $? -ne 0 ] ; then - printf " x Configure failed; CMake returned an error.\n\n\n" - cat "$log_file" - return 1 - fi - - printf " > Building...\n" - ( cd "build" && make -j "${processor_count}" ) > "$log_file" 2>&1 - if [ $? -ne 0 ] ; then - printf " x The build has failed.\n\n\n" - cat "$log_file" - return 1 - fi - - printf " > Installing...\n" - sudo touch /usr/lib/test_file > /dev/null 2>&1 - if [ $? -ne 0 ] ; then - printf " x Access denied to /usr/lib; the 'install' step will be skipped\n" - - else - ( cd "build" && sudo make install ) > "$log_file" 2>&1 - if [ $? -ne 0 ] ; then - printf " x Failed to install the library.\n\n\n" - cat "$log_file" - return 1 - fi - fi - - printf "\n" - - printf "pepy\n" - - printf " > Building...\n" - ( cd python && python2 ./setup.py build ) > "$log_file" 2>&1 - if [ $? -ne 0 ] ; then - printf " x Build failed.\n\n\n" - cat "$log_file" - return 1 - fi - - return 0 -} - -linux_build() { - printf "Building platform: linux\n" - - source /etc/*-release - printf "Distribution: ${DISTRIB_DESCRIPTION}\n\n" - - common_build - if [ $? -ne 0 ] ; then - return 1 - fi - - return 0 -} - -osx_build() { - printf "Building platform: macOS\n\n" - - printf "macOS version: " - sw_vers -productVersion - - common_build - if [ $? -ne 0 ] ; then - return 1 - fi - - return 0 -} - -main $@ -exit $? diff --git a/util/release b/util/release new file mode 100755 index 0000000..7dd7952 --- /dev/null +++ b/util/release @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# release: perform the chore work required for a pe-parse/pepy release + +set -eo pipefail + +function installed { + cmd=$(command -v "${1}") + + [[ -n "${cmd}" ]] && [[ -f "${cmd}" ]] + return ${?} +} + +function die { + >&2 echo "Barf: ${*}" + exit 1 +} + +# Fail early if we don't have the expected tools. +installed git || die "Missing dependency: git" + +# Fail early if `git status` reports any untracked changes. +[[ -n $(git status -s) ]] && die "Untracked changes in repo" + +# Next, check the VERSION in version and make sure it doesn't already have a git tag. +[[ -f ./VERSION ]] || die "Missing VERSION file; wrong directory?" +version=v$(<./VERSION) +[[ -n $(git tag -l "${version}") ]] && die "git tag for ${version} already exists!" + +# Next, craft a tag for the current HEAD. Push both the current commit and the tag. +git tag "${version}" +git push +git push origin "${version}" + +echo OK