Merge remote-tracking branch 'main/master' into HEAD

2025-04-28 21:34:31 +00:00 · 2021-02-08 14:17:51 +03:00 · 2021-02-08 14:17:51 +03:00 · e0dfc827b3
commit e0dfc827b3
parent d7fa0b2d4b d38c7daa7e
35 changed files with 1429 additions and 823 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -10,10 +10,29 @@ on:
    - cron:  '0 12 * * *'

 jobs:
-  test:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: deps
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y clang-format-9
+
+      - name: lint
+        run: |
+          mkdir build && cd build
+          cmake ..
+          cmake --build . --target peparse_format
+          cd .. && git diff --exit-code
+
+  pe-parse:
    strategy:
      matrix:
        platform: ["ubuntu-latest", "macos-latest"]
+        build-type: ["Debug", "Release"]
+        build-shared: ["0", "1"]
        compiler:
        - { CC: "clang", CXX: "clang++" }
        - { CC: "gcc", CXX: "g++" }
@ -23,27 +42,93 @@ jobs:
    runs-on: ${{ matrix.platform }}
    steps:
    - uses: actions/checkout@v2
-    - name: Build C
+    - name: build
      env:
        CC: ${{ matrix.compiler.CC }}
        CXX: ${{ matrix.compiler.CXX }}
      run: |
        mkdir build
        cd build
-        cmake ..
-        make
-    - name: Build Python
+        cmake \
+          -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
+          -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} \
+          ..
+        cmake --build .
+    - name: test
+      run: |
+        ./build/dump-pe/dump-pe ./test/assets/example.exe
+
+  pepy:
+    strategy:
+      matrix:
+        platform: ["ubuntu-latest", "macos-latest"]
+        python:
+          - "3.6"
+          - "3.7"
+          - "3.8"
+    runs-on: ${{ matrix.platform }}
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python }}
+    - name: build
      run: |
-        cd python
-        python2 setup.py build
        python3 setup.py build
-  test-windows:
+    - name: sdist and install
+      run: |
+        python3 setup.py sdist
+        python3 -m pip install --user dist/*.tar.gz
+    - name: test
+      run: |
+        python3 test/test_pepy.py test/assets/example.exe
+
+  pe-parse-windows:
+    strategy:
+      matrix:
+        build-arch: ["x64", "Win32"]
+        build-type: ["Debug", "Release"]
+        build-shared: ["0", "1"]
    runs-on: windows-latest
    steps:
    - uses: actions/checkout@v2
-    - name: Build C
+    - name: build
      run: |
        mkdir build
        cd build
-        cmake -G "Visual Studio 16 2019" -A x64 ..
-        cmake --build .
+        cmake `
+          -G "Visual Studio 16 2019" `
+          -A ${{ matrix.build-arch }} `
+          -DBUILD_SHARED_LIBS=${{ matrix.build-shared }} `
+          ..
+        cmake --build . --config ${{ matrix.build-type }}
+    - name: install
+      run: |
+        cd build
+        cmake --build . --target install
+    - name: test
+      run: |
+        .\build\bin\dump-pe.exe .\test\assets\example.exe
+
+  pepy-windows:
+    strategy:
+      matrix:
+        python:
+          - "3.6"
+          - "3.7"
+          - "3.8"
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python }}
+    - name: build
+      run: |
+        python setup.py build
+    - name: install
+      run: |
+        python -m pip install --user .
+    - name: test
+      run: |
+        python test/test_pepy.py test/assets/example.exe
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -0,0 +1,34 @@
+on:
+  push:
+    tags:
+      - 'v*'
+
+name: release
+
+jobs:
+  pypi:
+    name: upload release to PyPI
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v1
+      with:
+        python-version: 3.8
+    - name: create release
+      id: create_release
+      uses: actions/create-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      with:
+        tag_name: ${{ github.ref }}
+        release_name: Release ${{ github.ref }}
+        draft: false
+        prerelease: ${{ contains(github.ref, 'pre') || contains(github.ref, 'rc') }}
+    - name: sdist
+      run: python3 setup.py sdist
+    - name: publish
+      uses: pypa/gh-action-pypi-publish@master
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_TOKEN }}
+
--- a/.gitignore
+++ b/.gitignore
@ -1,8 +1,8 @@
-*Makefile*
+Makefile
 cmake_install.cmake
 dump-prog/dump-prog
 *.swp
-python/build
+build/
 .idea
 cmake-build-debug
 cmake-build-release
@ -11,8 +11,10 @@ CMakeSettings.json
 .vs
 .vscode
 examples_build
-
-
+.DS_Store
+dist/
+MANIFEST
+*.egg-info/
 *.stash

 *.o
--- a/.travis.yml
+++ b/.travis.yml
@ -1,63 +0,0 @@
-__build_stage_script: &__build_stage_script
-  stage: build
-  language: cpp
-  script:
-    - mkdir build && cd build
-    - cmake ..
-    - make
-    - cd ../python
-    - python2 setup.py build
-    - python3 setup.py build
-  addons:
-    apt:
-      packages:
-        - cmake
-        - python2.7
-        - python-dev
-        - build-essential
-        - realpath
-        - libicu-dev
-    homebrew:
-      packages:
-        - cmake
-    coverity_scan:
-      project:
-        name: "trailofbits/pe-parse"
-        description: "Principled, lightweight C/C++ PE parser"
-      notification_email: dan@trailofbits.com
-      build_command_prepend: mkdir cov_build && cd cov_build && cmake ..
-      build_command: make
-      branch_pattern: master
-
-jobs:
-  include:
-    - stage: lint
-      language: minimal
-      dist: bionic
-      addons:
-        apt:
-          packages:
-            - clang-format-8
-      script:
-        - find . \( -name '*.h' \) -or \( -name '*.cpp' \) | xargs clang-format -i -style=file
-        - git diff --exit-code
-    - <<: *__build_stage_script
-      os: linux
-      compiler: clang
-    - <<: *__build_stage_script
-      os: linux
-      compiler: gcc
-    - <<: *__build_stage_script
-      os: osx
-      compiler: clang
-    - stage: build
-      os: windows
-      language: cpp
-      script:
-        - mkdir build && cd build
-        - cmake -G "Visual Studio 15 2017 Win64" ..
-        - cmake --build .
-
-env:
-  global:
-  - secure: "O+BGqz4ugoVIJbQTh0dJjKRrsSVzkCYSe0WpRzEWK3l8Mw7hqX300g81TxRwTzN2zfUsROMzaeGaXWfGzYakgW59K1WIioaczxtv2MzzUQTbqzJPa+qQoP9bk/b2wJ5jcOL965/rudRju4UiIwuIgzDAMN3nAfIEJgV/2zANLIg="
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,11 +1,13 @@
-cmake_minimum_required(VERSION 3.7)
+cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 project(pe-parse)

+# NOTE(ww): CMake has bad defaults for install prefixes.
+# Instead of fussing over them, install everything to the build directory by default
+# and let the user set CMAKE_INSTALL_PREFIX explicitly for their own needs.
 if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
-  set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "Default install directory" FORCE)
+  set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}" CACHE PATH "Default install directory" FORCE)
 endif ()

-set(CMAKE_VERBOSE_MAKEFILE True)
 if (NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE "RelWithDebInfo")
 endif ()
@ -13,19 +15,45 @@ endif ()
 include(cmake/compilation_flags.cmake)
 list(APPEND GLOBAL_CXXFLAGS ${DEFAULT_CXX_FLAGS})

-option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF)
+option(BUILD_SHARED_LIBS "Build Shared Libraries" ON)
 option(BUILD_COMMAND_LINE_TOOLS "Build Command Line Tools" ON)
+option(PEPARSE_LIBRARY_WARNINGS "Log pe-parse library warnings to stderr" OFF)
+

 if (MSVC)
  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 endif ()

+file(READ "${PROJECT_SOURCE_DIR}/VERSION" PEPARSE_VERSION)
+string(STRIP "${PEPARSE_VERSION}" PEPARSE_VERSION)
+add_compile_definitions(PEPARSE_VERSION="${PEPARSE_VERSION}")
+
 add_subdirectory(pe-parser-library)

 if (BUILD_COMMAND_LINE_TOOLS)
  add_subdirectory(dump-pe)
 endif ()

+# `peparse_format` target.
+file(
+    GLOB_RECURSE
+    PEPARSE_ALL_SOURCES
+        pe-parser-library/*.cpp
+        pe-parser-library/*.h
+        pepy/*.cpp
+        pepy/*.h
+        dump-pe/*.cpp
+        examples/*.cpp
+        examples/*.h
+)
+add_custom_target(
+  peparse_format
+  COMMAND clang-format -i -style=file ${PEPARSE_ALL_SOURCES}
+  WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+  COMMENT "Auto-format the codebase with clang-format"
+  VERBATIM
+)
+
 message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 message(STATUS "Build Shared: ${BUILD_SHARED_LIBS} ${BUILD_SHARED_LIBS_MESSAGE}")
 message(STATUS "Build Command Line Tools: ${BUILD_COMMAND_LINE_TOOLS}")
--- a/7
+++ b/7
@ -7,15 +7,16 @@ LABEL creator "Trail of Bits"
 LABEL dockerfile_maintenance "William Woodruff <william@trailofbits>"
 LABEL desc "Principled, lightweight C/C++ PE parser"

-RUN apk add --no-cache cmake icu-dev build-base
+RUN apk add --no-cache cmake icu-dev clang build-base

 COPY . /app/pe-parse
 WORKDIR /app/pe-parse
+ENV CC=clang CXX=clang++
 RUN mkdir build && \
    cd build && \
    cmake -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" .. && \
-    cmake --build . --config "${BUILD_TYPE}" && \
-    cmake --build . --config "${BUILD_TYPE}" --target install
+    cmake --build . && \
+    cmake --build . --target install

 ENTRYPOINT [ "/usr/bin/dump-pe" ]
 CMD ["--help"]
--- a/LICENSE.txt
+++ b/LICENSE.txt
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,3 @@
+include VERSION
+include pepy/README.md
+include pe-parser-library/include/pe-parse/*.h
--- a/README.md
+++ b/README.md
@ -1,10 +1,15 @@
 pe-parse
-=========================================
+========

 [![Build Status](https://img.shields.io/github/workflow/status/trailofbits/pe-parse/CI/master)](https://github.com/trailofbits/pe-parse/actions?query=workflow%3ACI)
-[![Coverity Scan Build Status](https://scan.coverity.com/projects/3671/badge.svg)](https://scan.coverity.com/projects/3671)
+[![LGTM Total alerts](https://img.shields.io/lgtm/alerts/g/trailofbits/pe-parse.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/trailofbits/pe-parse/alerts/)

-pe-parse is a principled, lightweight parser for windows portable executable files. It was created to assist in compiled program analysis, potentially of programs of unknown origins. This means that it should be resistant to malformed or maliciously crafted PE files, and it should support questions that analysis software would ask of an executable program container. For example, listing relocations, describing imports and exports, and supporting byte reads from virtual addresses as well as file offsets.
+pe-parse is a principled, lightweight parser for Windows portable executable files.
+It was created to assist in compiled program analysis, potentially of programs of unknown origins.
+This means that it should be resistant to malformed or maliciously crafted PE files, and it should
+support questions that analysis software would ask of an executable program container.
+For example, listing relocations, describing imports and exports, and supporting byte reads from
+virtual addresses as well as file offsets.

 pe-parse supports these use cases via a minimal API that provides methods for
 * Opening and closing a PE file
@ -16,21 +21,44 @@ pe-parse supports these use cases via a minimal API that provides methods for
 * Reading bytes from specified virtual addresses
 * Retrieving the program entry point

-The interface is defined in `parser-library/parse.h`. The program in `dump-prog/dump.cpp` is an example of using the parser-library API to dump information about a PE file.
+The interface is defined in `parser-library/parse.h`.

-Internally, the parser-library uses a bounded buffer abstraction to access information stored in the PE file. This should help in constructing a sane parser that allows for detection of the use of bogus values in the PE that would result in out of bounds accesses of the input buffer. Once data is read from the file it is sanitized and placed in C++ STL containers of internal types.
+The program in `dump-prog/dump.cpp` is an example of using the parser-library API to dump
+information about a PE file.
+
+Internally, the parser-library uses a bounded buffer abstraction to access information stored in
+the PE file. This should help in constructing a sane parser that allows for detection of the use
+of bogus values in the PE that would result in out of bounds accesses of the input buffer.
+Once data is read from the file it is sanitized and placed in C++ STL containers of internal types.
+
+## Installation
+
+pe-parse can be installed via [vcpkg](https://github.com/microsoft/vcpkg):
+
+```bash
+$ vcpkg install pe-parse
+```
+
+pe-parse includes Python bindings via `pepy`, which can be installed via `pip`:
+
+```bash
+$ pip3 install pepy
+```
+
+More information about `pepy` can be found in its [README](./pepy/README.md).
+
+## Dependencies

-Dependencies
-========
 ### CMake
  * Debian/Ubuntu: `sudo apt-get install cmake`
  * RedHat/Fedora: `sudo yum install cmake`
  * OSX: `brew install cmake`
  * Windows: Download the installer from the [CMake page](https://cmake.org/download/)

-Building
-========
+## Building
+
 ### Generic instructions
+
 ```
 git clone https://github.com/trailofbits/pe-parse.git
 cd pe-parse
@ -39,48 +67,40 @@ mkdir build
 cd build

 cmake -DCMAKE_BUILD_TYPE=Release ..
-cmake --build . --config Release
+cmake --build .

 # optional
-cmake --build . --config Release --target install
+cmake --build . --target install
 ```

-PE files that have a Resource section with strings for the Type are encoded in UTF-16, but that `std::string` expects UTF-8. Some cross-platform solution
-is desired. You can let cmake choose one it finds in your build environment or you can choose one from the following options yourself and specify it with
-the `-DUNICODE_LIBRARY` argument when generating the project files with cmake:
-* `icu` (preferred) - "[ICU](http://site.icu-project.org/) is a mature, widely used set of C/C++ and Java libraries providing Unicode and Globalization support for software applications"
-* `codecvt` - A C++ library header file ([now deprecated](http://open-std.org/JTC1/SC22/WG21/docs/papers/2017/p0618r0.html)) supported by some C++ runtimes
+### Windows-specific

-### Notes about Windows
-
-If you are building on Windows with Visual Studio, the generator option can be used to select the compiler version and the output architecture:
+VS 2017 and VS 2019 are supported.

 ```
 # Compile 64-bit binaries with Visual Studio 2017
-cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release ..
+cmake -G "Visual Studio 15 2017 Win64" ..

-# Compile 32-bit binaries with Visual Studio 2017
-cmake -G "Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=Release ..
+# Or, with VS 2019, use the -A flag for architecture
+cmake -G "Visual Studio 16 2019" -A Win64 ..
+
+# Pass the build type at build time
+cmake --build . --config Release
 ```

-Visual Studio 2015 or higher is required to use codecvt, but you also have the option of using [ICU](http://site.icu-project.org/). The easiest way to
-get started with ICU in Windows is with [vcpkg](https://vcpkg.readthedocs.io/): `vcpkg install icu`. Then add the
-`-DCMAKE_TOOLCHAIN_FILE=C:\src\vcpkg\scripts\buildsystems\vcpkg.cmake` argument when generating the project files with cmake to add the appropriate
-library and include directories to the project.
+## Using the library

-Using the library
-=======
 Once the library is installed, linking to it is easy! Add the following lines in your CMake project:

 ```
-find_package(peparse REQUIRED)
+find_package(pe-parse REQUIRED)

-target_link_libraries(your_target_name ${PEPARSE_LIBRARIES})
-target_include_directories(your_target_name PRIVATE ${PEPARSE_INCLUDE_DIRS})
+target_link_libraries(your_target_name PRIVATE pe-parse::pe-parse)
 ```

-You can see a full example in the examples/peaddrconv folder.
+You can see a full example in the [examples/peaddrconv](examples/peaddrconv) folder.

-Authors
-=======
-pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com), with significant contributions from [Wesley Shields](https://github.com/wxsBSD).
+## Authors
+
+pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com), with significant
+contributions from [Wesley Shields](https://github.com/wxsBSD).
--- a/1
+++ b/1
@ -0,0 +1 @@
+1.2.0
--- a/cmake/compilation_flags.cmake
+++ b/cmake/compilation_flags.cmake
@ -32,10 +32,8 @@ else ()
  endif ()

  if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-    message(STATUS "This is a debug build; enabling -Weverything...")
-
    list(APPEND DEFAULT_CXX_FLAGS
-      -Weverything -Wno-c++98-compat -Wno-missing-prototypes
+      -Wno-c++98-compat -Wno-missing-prototypes
      -Wno-missing-variable-declarations -Wno-global-constructors
      -Wno-exit-time-destructors -Wno-padded -Wno-error
    )
--- a/dump-pe/CMakeLists.txt
+++ b/dump-pe/CMakeLists.txt
@ -1,8 +1,8 @@
-cmake_minimum_required(VERSION 3.7)
+cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 project(dump-pe)

 add_executable(${PROJECT_NAME} main.cpp)
-target_link_libraries(${PROJECT_NAME} PRIVATE pe-parser-library)
+target_link_libraries(${PROJECT_NAME} PRIVATE pe-parse)
 target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})

-install(TARGETS ${PROJECT_NAME} DESTINATION "bin")
+install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION "bin")
--- a/dump-pe/main.cpp
+++ b/dump-pe/main.cpp
@ -27,11 +27,16 @@ THE SOFTWARE.
 #include <iostream>
 #include <sstream>

-#include <parser-library/parse.h>
+#include <pe-parse/parse.h>
+
+#include "vendor/argh.h"

 using namespace peparse;

-int printExps(void *N, VA funcAddr, std::string &mod, std::string &func) {
+int printExps(void *N,
+              const VA &funcAddr,
+              const std::string &mod,
+              const std::string &func) {
  static_cast<void>(N);

  auto address = static_cast<std::uint32_t>(funcAddr);
@ -47,7 +52,7 @@ int printExps(void *N, VA funcAddr, std::string &mod, std::string &func) {
 }

 int printImports(void *N,
-                 VA impAddr,
+                 const VA &impAddr,
                 const std::string &modName,
                 const std::string &symName) {
  static_cast<void>(N);
@ -59,7 +64,7 @@ int printImports(void *N,
  return 0;
 }

-int printRelocs(void *N, VA relocAddr, reloc_type type) {
+int printRelocs(void *N, const VA &relocAddr, const reloc_type &type) {
  static_cast<void>(N);

  std::cout << "TYPE: ";
@ -99,12 +104,12 @@ int printRelocs(void *N, VA relocAddr, reloc_type type) {
 }

 int printSymbols(void *N,
-                 std::string &strName,
-                 uint32_t &value,
-                 int16_t &sectionNumber,
-                 uint16_t &type,
-                 uint8_t &storageClass,
-                 uint8_t &numberOfAuxSymbols) {
+                 const std::string &strName,
+                 const uint32_t &value,
+                 const int16_t &sectionNumber,
+                 const uint16_t &type,
+                 const uint8_t &storageClass,
+                 const uint8_t &numberOfAuxSymbols) {
  static_cast<void>(N);

  std::cout << "Symbol Name: " << strName << "\n";
@ -227,7 +232,7 @@ int printSymbols(void *N,
  return 0;
 }

-int printRich(void *N, rich_entry r) {
+int printRich(void *N, const rich_entry &r) {
  static_cast<void>(N);
  std::cout << std::dec;
  std::cout << std::setw(10) << "ProdId:" << std::setw(7) << r.ProductId;
@ -239,7 +244,7 @@ int printRich(void *N, rich_entry r) {
  return 0;
 }

-int printRsrc(void *N, resource r) {
+int printRsrc(void *N, const resource &r) {
  static_cast<void>(N);

  if (r.type_str.length())
@ -264,10 +269,10 @@ int printRsrc(void *N, resource r) {
 }

 int printSecs(void *N,
-              VA secBase,
-              std::string &secName,
-              image_section_header s,
-              bounded_buffer *data) {
+              const VA &secBase,
+              const std::string &secName,
+              const image_section_header &s,
+              const bounded_buffer *data) {
  static_cast<void>(N);
  static_cast<void>(s);

@ -292,14 +297,21 @@ int printSecs(void *N,
  std::cout << std::boolalpha << static_cast<bool>(p->peHeader.x) << "\n";

 int main(int argc, char *argv[]) {
-  if (argc != 2 || (argc == 2 && std::strcmp(argv[1], "--help") == 0)) {
+
+  argh::parser cmdl(argv);
+
+  if (cmdl[{"-h", "--help"}] || argc <= 1) {
    std::cout << "dump-pe utility from Trail of Bits\n";
    std::cout << "Repository: https://github.com/trailofbits/pe-parse\n\n";
    std::cout << "Usage:\n\tdump-pe /path/to/executable.exe\n";
-    return 1;
+    return 0;
+  } else if (cmdl[{"-v", "--version"}]) {
+    std::cout << "dump-pe (pe-parse) version " << PEPARSE_VERSION << "\n";
+    return 0;
  }

-  parsed_pe *p = ParsePEFromFile(argv[1]);
+  parsed_pe *p = ParsePEFromFile(cmdl[1].c_str());
+
  if (p == nullptr) {
    std::cout << "Error: " << GetPEErr() << " (" << GetPEErrString() << ")"
              << "\n";
--- a/dump-pe/vendor/argh.h
+++ b/dump-pe/vendor/argh.h
@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 2016, Adi Shavit
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of  nor the names of its contributors may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <sstream>
+#include <limits>
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <cassert>
+
+namespace argh
+{
+   // Terminology:
+   // A command line is composed of 2 types of args:
+   // 1. Positional args, i.e. free standing values
+   // 2. Options: args beginning with '-'. We identify two kinds:
+   //    2.1: Flags: boolean options =>  (exist ? true : false)
+   //    2.2: Parameters: a name followed by a non-option value
+
+#if !defined(__GNUC__) || (__GNUC__ >= 5)
+   using string_stream = std::istringstream;
+#else
+    // Until GCC 5, istringstream did not have a move constructor.
+    // stringstream_proxy is used instead, as a workaround.
+   class stringstream_proxy
+   {
+   public:
+      stringstream_proxy() = default;
+
+      // Construct with a value.
+      stringstream_proxy(std::string const& value) :
+         stream_(value)
+      {}
+
+      // Copy constructor.
+      stringstream_proxy(const stringstream_proxy& other) :
+         stream_(other.stream_.str())
+      {
+         stream_.setstate(other.stream_.rdstate());
+      }
+
+      void setstate(std::ios_base::iostate state) { stream_.setstate(state); }
+
+      // Stream out the value of the parameter.
+      // If the conversion was not possible, the stream will enter the fail state,
+      // and operator bool will return false.
+      template<typename T>
+      stringstream_proxy& operator >> (T& thing)
+      {
+         stream_ >> thing;
+         return *this;
+      }
+
+
+      // Get the string value.
+      std::string str() const { return stream_.str(); }
+
+      std::stringbuf* rdbuf() const { return stream_.rdbuf(); }
+
+      // Check the state of the stream.
+      // False when the most recent stream operation failed
+      explicit operator bool() const { return !!stream_; }
+
+      ~stringstream_proxy() = default;
+   private:
+      std::istringstream stream_;
+   };
+   using string_stream = stringstream_proxy;
+#endif
+
+   class parser
+   {
+   public:
+      enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0,
+                  PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1,
+                  NO_SPLIT_ON_EQUALSIGN = 1 << 2,
+                  SINGLE_DASH_IS_MULTIFLAG = 1 << 3,
+                };
+
+      parser() = default;
+
+      parser(std::initializer_list<char const* const> pre_reg_names)
+      {  add_params(pre_reg_names); }
+
+      parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
+      {  parse(argv, mode); }
+
+      parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
+      {  parse(argc, argv, mode); }
+
+      void add_param(std::string const& name);
+      void add_params(std::initializer_list<char const* const> init_list);
+
+      void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
+      void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
+
+      std::multiset<std::string>          const& flags()    const { return flags_;    }
+      std::map<std::string, std::string>  const& params()   const { return params_;   }
+      std::vector<std::string>            const& pos_args() const { return pos_args_; }
+
+      // begin() and end() for using range-for over positional args.
+      std::vector<std::string>::const_iterator begin() const { return pos_args_.cbegin(); }
+      std::vector<std::string>::const_iterator end()   const { return pos_args_.cend();   }
+      size_t size()                                    const { return pos_args_.size();   }
+
+      //////////////////////////////////////////////////////////////////////////
+      // Accessors
+
+      // flag (boolean) accessors: return true if the flag appeared, otherwise false.
+      bool operator[](std::string const& name) const;
+
+      // multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false.
+      bool operator[](std::initializer_list<char const* const> init_list) const;
+
+      // returns positional arg string by order. Like argv[] but without the options
+      std::string const& operator[](size_t ind) const;
+
+      // returns a std::istream that can be used to convert a positional arg to a typed value.
+      string_stream operator()(size_t ind) const;
+
+      // same as above, but with a default value in case the arg is missing (index out of range).
+      template<typename T>
+      string_stream operator()(size_t ind, T&& def_val) const;
+
+      // parameter accessors, give a name get an std::istream that can be used to convert to a typed value.
+      // call .str() on result to get as string
+      string_stream operator()(std::string const& name) const;
+
+      // accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value.
+      // call .str() on result to get as string
+      // returns the first value in the list to be found.
+      string_stream operator()(std::initializer_list<char const* const> init_list) const;
+
+      // same as above, but with a default value in case the param was missing.
+      // Non-string def_val types must have an operator<<() (output stream operator)
+      // If T only has an input stream operator, pass the string version of the type as in "3" instead of 3.
+      template<typename T>
+      string_stream operator()(std::string const& name, T&& def_val) const;
+
+      // same as above but for a list of names. returns the first value to be found.
+      template<typename T>
+      string_stream operator()(std::initializer_list<char const* const> init_list, T&& def_val) const;
+
+   private:
+      string_stream bad_stream() const;
+      std::string trim_leading_dashes(std::string const& name) const;
+      bool is_number(std::string const& arg) const;
+      bool is_option(std::string const& arg) const;
+      bool got_flag(std::string const& name) const;
+      bool is_param(std::string const& name) const;
+
+   private:
+      std::vector<std::string> args_;
+      std::map<std::string, std::string> params_;
+      std::vector<std::string> pos_args_;
+      std::multiset<std::string> flags_;
+      std::set<std::string> registeredParams_;
+      std::string empty_;
+   };
+
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::parse(const char * const argv[], int mode)
+   {
+      int argc = 0;
+      for (auto argvp = argv; *argvp; ++argc, ++argvp);
+      parse(argc, argv, mode);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/)
+   {
+      // convert to strings
+      args_.resize(static_cast<decltype(args_)::size_type>(argc));
+      std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg;  });
+
+      // parse line
+      for (auto i = 0u; i < args_.size(); ++i)
+      {
+         if (!is_option(args_[i]))
+         {
+            pos_args_.emplace_back(args_[i]);
+            continue;
+         }
+
+         auto name = trim_leading_dashes(args_[i]);
+
+         if (!(mode & NO_SPLIT_ON_EQUALSIGN))
+         {
+            auto equalPos = name.find('=');
+            if (equalPos != std::string::npos)
+            {
+               params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) });
+               continue;
+            }
+         }
+
+         // if the option is unregistered and should be a multi-flag
+         if (1 == (args_[i].size() - name.size()) &&         // single dash
+            argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode
+            !is_param(name))                                  // unregistered
+         {
+            std::string keep_param;
+
+            if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param
+            {
+               keep_param += name.back();
+               name.resize(name.size() - 1);
+            }
+
+            for (auto const& c : name)
+            {
+               flags_.emplace(std::string{ c });
+            }
+
+            if (!keep_param.empty())
+            {
+               name = keep_param;
+            }
+            else
+            {
+               continue; // do not consider other options for this arg
+            }
+         }
+
+         // any potential option will get as its value the next arg, unless that arg is an option too
+         // in that case it will be determined a flag.
+         if (i == args_.size() - 1 || is_option(args_[i + 1]))
+         {
+            flags_.emplace(name);
+            continue;
+         }
+
+         // if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped
+         // otherwise we have 2 modes:
+         // PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag.
+         //                               The following value (the next arg) will be a free parameter.
+         //
+         // PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg
+         //                                will be the value of that option.
+
+         assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION)
+             || !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION));
+
+         bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION;
+
+         if (is_param(name) || preferParam)
+         {
+            params_.insert({ name, args_[i + 1] });
+            ++i; // skip next value, it is not a free parameter
+            continue;
+         }
+         else
+         {
+            flags_.emplace(name);
+         }
+      };
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::bad_stream() const
+   {
+      string_stream bad;
+      bad.setstate(std::ios_base::failbit);
+      return bad;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::is_number(std::string const& arg) const
+   {
+      // inefficient but simple way to determine if a string is a number (which can start with a '-')
+      std::istringstream istr(arg);
+      double number;
+      istr >> number;
+      return !(istr.fail() || istr.bad());
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::is_option(std::string const& arg) const
+   {
+      assert(0 != arg.size());
+      if (is_number(arg))
+         return false;
+      return '-' == arg[0];
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline std::string parser::trim_leading_dashes(std::string const& name) const
+   {
+      auto pos = name.find_first_not_of('-');
+      return std::string::npos != pos ? name.substr(pos) : name;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool argh::parser::got_flag(std::string const& name) const
+   {
+      return flags_.end() != flags_.find(trim_leading_dashes(name));
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool argh::parser::is_param(std::string const& name) const
+   {
+      return registeredParams_.count(name);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::operator[](std::string const& name) const
+   {
+      return got_flag(name);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::operator[](std::initializer_list<char const* const> init_list) const
+   {
+      return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); });
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline std::string const& parser::operator[](size_t ind) const
+   {
+      if (ind < pos_args_.size())
+         return pos_args_[ind];
+      return empty_;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(std::string const& name) const
+   {
+      auto optIt = params_.find(trim_leading_dashes(name));
+      if (params_.end() != optIt)
+         return string_stream(optIt->second);
+      return bad_stream();
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(std::initializer_list<char const* const> init_list) const
+   {
+      for (auto& name : init_list)
+      {
+         auto optIt = params_.find(trim_leading_dashes(name));
+         if (params_.end() != optIt)
+            return string_stream(optIt->second);
+      }
+      return bad_stream();
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   template<typename T>
+   string_stream parser::operator()(std::string const& name, T&& def_val) const
+   {
+      auto optIt = params_.find(trim_leading_dashes(name));
+      if (params_.end() != optIt)
+         return string_stream(optIt->second);
+
+      std::ostringstream ostr;
+      ostr.precision(std::numeric_limits<long double>::max_digits10);
+      ostr << def_val;
+      return string_stream(ostr.str()); // use default
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   // same as above but for a list of names. returns the first value to be found.
+   template<typename T>
+   string_stream parser::operator()(std::initializer_list<char const* const> init_list, T&& def_val) const
+   {
+      for (auto& name : init_list)
+      {
+         auto optIt = params_.find(trim_leading_dashes(name));
+         if (params_.end() != optIt)
+            return string_stream(optIt->second);
+      }
+      std::ostringstream ostr;
+      ostr.precision(std::numeric_limits<long double>::max_digits10);
+      ostr << def_val;
+      return string_stream(ostr.str()); // use default
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(size_t ind) const
+   {
+      if (pos_args_.size() <= ind)
+         return bad_stream();
+
+      return string_stream(pos_args_[ind]);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   template<typename T>
+   string_stream parser::operator()(size_t ind, T&& def_val) const
+   {
+      if (pos_args_.size() <= ind)
+      {
+         std::ostringstream ostr;
+         ostr.precision(std::numeric_limits<long double>::max_digits10);
+         ostr << def_val;
+         return string_stream(ostr.str());
+      }
+
+      return string_stream(pos_args_[ind]);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::add_param(std::string const& name)
+   {
+      registeredParams_.insert(trim_leading_dashes(name));
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::add_params(std::initializer_list<char const* const> init_list)
+   {
+      for (auto& name : init_list)
+         registeredParams_.insert(trim_leading_dashes(name));
+   }
+}
--- a/examples/peaddrconv/CMakeLists.txt
+++ b/examples/peaddrconv/CMakeLists.txt
@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 project(peaddrconv)

 if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
@ -35,11 +35,10 @@ else ()
  endif ()
 endif ()

-find_package(peparse REQUIRED)
+find_package(pe-parse REQUIRED)

 add_executable(${PROJECT_NAME} main.cpp)
-target_link_libraries(${PROJECT_NAME} ${PEPARSE_LIBRARIES})
-target_include_directories(${PROJECT_NAME} PRIVATE ${PEPARSE_INCLUDE_DIR})
+target_link_libraries(${PROJECT_NAME} pe-parse::pe-parse)
 target_compile_options(${PROJECT_NAME} PRIVATE ${PEADDRCONV_CXXFLAGS})

 install(TARGETS ${PROJECT_NAME} DESTINATION "bin")
--- a/examples/peaddrconv/main.cpp
+++ b/examples/peaddrconv/main.cpp
@ -6,7 +6,7 @@
 #include <climits>
 #include <cstring>

-#include <parser-library/parse.h>
+#include <pe-parse/parse.h>

 using ParsedPeRef =
    std::unique_ptr<peparse::parsed_pe, void (*)(peparse::parsed_pe *)>;
@ -243,7 +243,9 @@ bool convertAddress(ParsedPeRef &pe,
                            result);
    }

-    default: { return false; }
+    default: {
+      return false;
+    }
  }
 }

--- a/pe-parser-library/CMakeLists.txt
+++ b/pe-parser-library/CMakeLists.txt
@ -1,81 +1,63 @@
-cmake_minimum_required(VERSION 3.7)
-project(pe-parser-library)
+cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
+project(pe-parse)

-set(UNICODE_LIBRARY "any" CACHE STRING "Select a unicode library")
-set_property(CACHE UNICODE_LIBRARY PROPERTY STRINGS "any" "icu" "codecvt")
-
-# This variable is used twice so setting once at the top here to prevent
-# the chance they get out of sync.
-# This is the minimum "required" version but there's a good chance early
-# versions of ICU support the simple functionality needed by this project.
-set(ICU_MINIMUM_REQUIRED 55.0)
+message(STATUS "VERSION file: ${PROJECT_SOURCE_DIR}/../VERSION")

 # List all files explicitly; this will make IDEs happy (i.e. QtCreator, CLion, ...)
 list(APPEND PEPARSERLIB_SOURCEFILES
-  include/parser-library/parse.h
-  include/parser-library/nt-headers.h
-  include/parser-library/to_string.h
+  include/pe-parse/parse.h
+  include/pe-parse/nt-headers.h
+  include/pe-parse/to_string.h

  src/buffer.cpp
  src/parse.cpp
 )

-# Check for codecvt support. Likely the proper way to do this would be to
-# use CMake system inspection via methods like "try_compile" to determine
-# if the "#include <codecvt>" directive compiles successfully.
-if (MSVC)
-  if (MSVC_VERSION LESS 1900)
-    set(CODECVT_SUPPORTED OFF)
-  else ()
-    set(CODECVT_SUPPORTED ON)
-  endif ()
-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
-    set(CODECVT_SUPPORTED OFF)
-  else ()
-    set(CODECVT_SUPPORTED ON)
-  endif ()
-else ()
-  find_path(CODECVT_INCLUDE_DIR NAMES "codecvt")
-  if (CODECVT_INCLUDE_DIR)
-    set(CODECVT_SUPPORTED OFF)
-  else ()
-    set(CODECVT_SUPPORTED ON)
-  endif ()
-endif ()
-
-if(${UNICODE_LIBRARY} MATCHES "icu")
-  find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc REQUIRED)
-  add_definitions(-DUSE_ICU4C)
-  list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
-elseif(${UNICODE_LIBRARY} MATCHES "codecvt")
-  if(NOT CODECVT_SUPPORTED)
-    message(SEND_ERROR "codecvt header not found")
-  endif()
-  list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
+# NOTE(ww): On Windows we use the Win32 API's built-in UTF16 conversion
+# routines; on other platforms we use codecvt. codecvt is nominally deprecated
+# in C++17 and onwards, but will probably be available for quite some time.
+# Previous versions of pe-parse used ICU when available, but this caused
+# DLL hell on Windows and wasn't worth the additional dependency.
+if(MSVC)
+  list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_winapi.cpp)
 else()
-  find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc)
-  if(ICU_FOUND)
-    add_definitions(-DUSE_ICU4C)
-    list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
-  elseif(CODECVT_SUPPORTED)
-    list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
-  else()
-    message(SEND_ERROR "unable to find codecvt header or ICU library (hint: try installing libicu-dev)")
-  endif(ICU_FOUND)
+  list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
 endif()

 add_library(${PROJECT_NAME} ${PEPARSERLIB_SOURCEFILES})
-target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
-if(ICU_FOUND)
-  target_link_libraries(${PROJECT_NAME} ICU::uc)
-endif()

-install(TARGETS ${PROJECT_NAME}
-  RUNTIME DESTINATION "bin"
-  LIBRARY DESTINATION "lib"
-  ARCHIVE DESTINATION "lib"
+if(PEPARSE_LIBRARY_WARNINGS)
+	target_compile_definitions(${PROJECT_NAME} PRIVATE PEPARSE_LIBRARY_WARNINGS=1)
+endif ()
+
+target_include_directories(
+  ${PROJECT_NAME}
+  PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+  $<INSTALL_INTERFACE:include>
 )
-install(FILES "cmake/peparse-config.cmake" DESTINATION "lib/cmake/peparse")
-install(DIRECTORY "include/parser-library" DESTINATION "include")
+target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
+
+install(
+  TARGETS ${PROJECT_NAME}
+  EXPORT pe-parse-config
+  RUNTIME
+    DESTINATION "bin"
+  LIBRARY
+    DESTINATION "lib"
+  ARCHIVE
+    DESTINATION "lib"
+)
+export(
+  TARGETS ${PROJECT_NAME}
+  NAMESPACE pe-parse::
+  FILE "${CMAKE_CURRENT_BINARY_DIR}/pe-parse-config.cmake"
+)
+install(
+  EXPORT
+  pe-parse-config
+  DESTINATION "lib/cmake/pe-parse"
+  NAMESPACE pe-parse::
+  EXPORT_LINK_INTERFACE_LIBRARIES
+)
+install(DIRECTORY "include/pe-parse" DESTINATION "include")
--- a/pe-parser-library/cmake/pe-parse-config.cmake
+++ b/pe-parser-library/cmake/pe-parse-config.cmake
@ -0,0 +1,5 @@
+find_path(PEPARSE_INCLUDE_DIR "pe-parse/parse.h")
+find_library(PEPARSE_LIBRARIES NAMES "libpe-parse" "pe-parse")
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(pe-parse DEFAULT_MSG PEPARSE_INCLUDE_DIR PEPARSE_LIBRARIES)
--- a/pe-parser-library/cmake/peparse-config.cmake
+++ b/pe-parser-library/cmake/peparse-config.cmake
@ -1,9 +0,0 @@
-if(CMAKE_CROSSCOMPILING)
-  find_path(PEPARSE_INCLUDE_DIR "parser-library/parse.h")
-else()
-  find_path(PEPARSE_INCLUDE_DIR $<SHELL_PATH:"parser-library/parse.h">)
-endif()
-find_library(PEPARSE_LIBRARIES NAMES "libpe-parser-library" "pe-parser-library")
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(peparse DEFAULT_MSG PEPARSE_INCLUDE_DIR PEPARSE_LIBRARIES)
--- a/pe-parser-library/include/parser-library/nt-headers.h
+++ b/pe-parser-library/include/parser-library/nt-headers.h
@ -28,10 +28,6 @@ THE SOFTWARE.
 #include <string>
 #include <vector>

-#define _offset(t, f)         \
-  static_cast<std::uint32_t>( \
-      reinterpret_cast<std::ptrdiff_t>(&static_cast<t *>(nullptr)->f))
-
 // need to pack these structure definitions

 // some constant definitions
@ -210,6 +206,19 @@ constexpr std::uint8_t IMAGE_SYM_CLASS_FILE = 103;
 constexpr std::uint8_t IMAGE_SYM_CLASS_SECTION = 104;
 constexpr std::uint8_t IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105;
 constexpr std::uint8_t IMAGE_SYM_CLASS_CLR_TOKEN = 107;
+
+// Optional header DLL characteristics
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE = 0x0040;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY = 0x0080;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NX_COMPAT = 0x0100;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = 0x0200;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_SEH = 0x0400;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_NO_BIND = 0x0800;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_APPCONTAINER = 0x1000;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = 0x2000;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_GUARD_CF = 0x4000;
+constexpr std::uint16_t IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000;
 #endif
 // clang-format on

--- a/pe-parser-library/include/parser-library/parse.h
+++ b/pe-parser-library/include/parser-library/parse.h
@ -32,7 +32,7 @@ THE SOFTWARE.
 #include "to_string.h"

 #ifdef _MSC_VER
-#define __typeof__(x) std::remove_reference < decltype(x)> ::type
+#define __typeof__(x) std::remove_reference<decltype(x)>::type
 #endif

 #define PE_ERR(x)               \
@ -40,28 +40,28 @@ THE SOFTWARE.
  err_loc.assign(__func__);     \
  err_loc += ":" + to_string<std::uint32_t>(__LINE__, std::dec);

-#define READ_WORD(b, o, inst, member)                                     \
-  if (!readWord(b, o + _offset(__typeof__(inst), member), inst.member)) { \
-    PE_ERR(PEERR_READ);                                                   \
-    return false;                                                         \
-  }
-
-#define READ_DWORD(b, o, inst, member)                                     \
-  if (!readDword(b, o + _offset(__typeof__(inst), member), inst.member)) { \
+#define READ_WORD(b, o, inst, member)                                      \
+  if (!readWord(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
    PE_ERR(PEERR_READ);                                                    \
    return false;                                                          \
  }

-#define READ_QWORD(b, o, inst, member)                                     \
-  if (!readQword(b, o + _offset(__typeof__(inst), member), inst.member)) { \
-    PE_ERR(PEERR_READ);                                                    \
-    return false;                                                          \
+#define READ_DWORD(b, o, inst, member)                                      \
+  if (!readDword(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
+    PE_ERR(PEERR_READ);                                                     \
+    return false;                                                           \
  }

-#define READ_BYTE(b, o, inst, member)                                     \
-  if (!readByte(b, o + _offset(__typeof__(inst), member), inst.member)) { \
-    PE_ERR(PEERR_READ);                                                   \
-    return false;                                                         \
+#define READ_QWORD(b, o, inst, member)                                      \
+  if (!readQword(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
+    PE_ERR(PEERR_READ);                                                     \
+    return false;                                                           \
+  }
+
+#define READ_BYTE(b, o, inst, member)                                      \
+  if (!readByte(b, o + offsetof(__typeof__(inst), member), inst.member)) { \
+    PE_ERR(PEERR_READ);                                                    \
+    return false;                                                          \
  }

 #define TEST_MACHINE_CHARACTERISTICS(h, m, ch) \
@ -149,6 +149,7 @@ bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out);
 bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out);

 bounded_buffer *readFileToFileBuffer(const char *filePath);
+bounded_buffer *makeBufferFromPointer(std::uint8_t *data, std::uint32_t sz);
 bounded_buffer *
 splitBuffer(bounded_buffer *b, std::uint32_t from, std::uint32_t to);
 void deleteBuffer(bounded_buffer *b);
@ -186,42 +187,54 @@ std::string GetPEErrLoc();
 // get a PE parse context from a file
 parsed_pe *ParsePEFromFile(const char *filePath);

+parsed_pe *ParsePEFromPointer(std::uint8_t *buffer, std::uint32_t sz);
+parsed_pe *ParsePEFromBuffer(bounded_buffer *buffer);
+
 // destruct a PE context
 void DestructParsedPE(parsed_pe *p);

 // iterate over Rich header entries
-typedef int (*iterRich)(void *, rich_entry);
+typedef int (*iterRich)(void *, const rich_entry &);
 void IterRich(parsed_pe *pe, iterRich cb, void *cbd);

 // iterate over the resources
-typedef int (*iterRsrc)(void *, resource);
+typedef int (*iterRsrc)(void *, const resource &);
 void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd);

 // iterate over the imports by RVA and string
-typedef int (*iterVAStr)(void *, VA, const std::string &, const std::string &);
+typedef int (*iterVAStr)(void *,
+                         const VA &,
+                         const std::string &,
+                         const std::string &);
 void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd);

 // iterate over relocations in the PE file
-typedef int (*iterReloc)(void *, VA, reloc_type);
+typedef int (*iterReloc)(void *, const VA &, const reloc_type &);
 void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd);

 // Iterate over symbols (symbol table) in the PE file
 typedef int (*iterSymbol)(void *,
-                          std::string &,
-                          std::uint32_t &,
-                          std::int16_t &,
-                          std::uint16_t &,
-                          std::uint8_t &,
-                          std::uint8_t &);
+                          const std::string &,
+                          const std::uint32_t &,
+                          const std::int16_t &,
+                          const std::uint16_t &,
+                          const std::uint8_t &,
+                          const std::uint8_t &);
 void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd);

 // iterate over the exports
-typedef int (*iterExp)(void *, VA, std::string &, std::string &);
+typedef int (*iterExp)(void *,
+                       const VA &,
+                       const std::string &,
+                       const std::string &);
 void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd);

 // iterate over sections
-typedef int (*iterSec)(
-    void *, VA secBase, std::string &, image_section_header, bounded_buffer *b);
+typedef int (*iterSec)(void *,
+                       const VA &,
+                       const std::string &,
+                       const image_section_header &,
+                       const bounded_buffer *);
 void IterSec(parsed_pe *pe, iterSec cb, void *cbd);

 // get byte at VA in PE
--- a/pe-parser-library/include/parser-library/to_string.h
+++ b/pe-parser-library/include/parser-library/to_string.h
@ -3,9 +3,8 @@
 #include <sstream>
 #include <string>

-#ifdef USE_ICU4C
-#include <unicode/unistr.h>
-typedef std::basic_string<UChar> UCharString;
+#if defined(_MSC_VER)
+typedef std::basic_string<wchar_t> UCharString;
 #else
 typedef std::u16string UCharString;
 #endif
--- a/pe-parser-library/src/buffer.cpp
+++ b/pe-parser-library/src/buffer.cpp
@ -26,7 +26,7 @@ THE SOFTWARE.
 #include <fstream>

 // keep this header above "windows.h" because it contains many types
-#include <parser-library/parse.h>
+#include <pe-parse/parse.h>

 #ifdef _WIN32

@ -298,6 +298,28 @@ bounded_buffer *readFileToFileBuffer(const char *filePath) {
  return p;
 }

+bounded_buffer *makeBufferFromPointer(std::uint8_t *data, std::uint32_t sz) {
+  if (data == nullptr) {
+    PE_ERR(PEERR_MEM);
+    return nullptr;
+  }
+
+  bounded_buffer *p = new (std::nothrow) bounded_buffer();
+
+  if (p == nullptr) {
+    PE_ERR(PEERR_MEM);
+    return nullptr;
+  }
+
+  p->copy = true;
+  p->detail = nullptr;
+  p->buf = data;
+  p->bufLen = sz;
+  p->swapBytes = false;
+
+  return p;
+}
+
 // split buffer inclusively from from to to by offset
 bounded_buffer *
 splitBuffer(bounded_buffer *b, std::uint32_t from, std::uint32_t to) {
--- a/pe-parser-library/src/parse.cpp
+++ b/pe-parser-library/src/parse.cpp
@ -24,14 +24,15 @@ THE SOFTWARE.

 #include <algorithm>
 #include <array>
+#include <cassert>
 #include <cstring>
 #include <iostream>
 #include <stdexcept>
 #include <vector>

-#include <parser-library/nt-headers.h>
-#include <parser-library/parse.h>
-#include <parser-library/to_string.h>
+#include <pe-parse/nt-headers.h>
+#include <pe-parse/parse.h>
+#include <pe-parse/to_string.h>

 namespace peparse {

@ -597,7 +598,7 @@ bool getSecForVA(const std::vector<section> &secs, VA v, section &sec) {
 }

 void IterRich(parsed_pe *pe, iterRich cb, void *cbd) {
-  for (rich_entry r : pe->peHeader.rich.Entries) {
+  for (rich_entry &r : pe->peHeader.rich.Entries) {
    if (cb(cbd, r) != 0) {
      break;
    }
@ -607,13 +608,11 @@ void IterRich(parsed_pe *pe, iterRich cb, void *cbd) {
 void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
  parsed_pe_internal *pint = pe->internal;

-  for (resource r : pint->rsrcs) {
+  for (const resource &r : pint->rsrcs) {
    if (cb(cbd, r) != 0) {
      break;
    }
  }
-
-  return;
 }

 bool parse_resource_id(bounded_buffer *data,
@ -672,7 +671,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
      rde = new resource_dir_entry;
    }

-    if (!readDword(sectionData, o + _offset(__typeof__(*rde), ID), rde->ID)) {
+    if (!readDword(sectionData, o + offsetof(__typeof__(*rde), ID), rde->ID)) {
      PE_ERR(PEERR_READ);
      if (dirent == nullptr) {
        delete rde;
@ -680,7 +679,8 @@ bool parse_resource_table(bounded_buffer *sectionData,
      return false;
    }

-    if (!readDword(sectionData, o + _offset(__typeof__(*rde), RVA), rde->RVA)) {
+    if (!readDword(
+            sectionData, o + offsetof(__typeof__(*rde), RVA), rde->RVA)) {
      PE_ERR(PEERR_READ);
      if (dirent == nullptr) {
        delete rde;
@ -761,7 +761,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
       */

      if (!readDword(sectionData,
-                     rde->RVA + _offset(__typeof__(rdat), RVA),
+                     rde->RVA + offsetof(__typeof__(rdat), RVA),
                     rdat.RVA)) {
        PE_ERR(PEERR_READ);
        if (dirent == nullptr) {
@ -771,7 +771,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
      }

      if (!readDword(sectionData,
-                     rde->RVA + _offset(__typeof__(rdat), size),
+                     rde->RVA + offsetof(__typeof__(rdat), size),
                     rdat.size)) {
        PE_ERR(PEERR_READ);
        if (dirent == nullptr) {
@ -781,7 +781,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
      }

      if (!readDword(sectionData,
-                     rde->RVA + _offset(__typeof__(rdat), codepage),
+                     rde->RVA + offsetof(__typeof__(rdat), codepage),
                     rdat.codepage)) {
        PE_ERR(PEERR_READ);
        if (dirent == nullptr) {
@ -791,7 +791,7 @@ bool parse_resource_table(bounded_buffer *sectionData,
      }

      if (!readDword(sectionData,
-                     rde->RVA + _offset(__typeof__(rdat), reserved),
+                     rde->RVA + offsetof(__typeof__(rdat), reserved),
                     rdat.reserved)) {
        PE_ERR(PEERR_READ);
        if (dirent == nullptr) {
@ -937,9 +937,22 @@ bool getSections(bounded_buffer *b,
    std::uint32_t highOff = lowOff + curSec.SizeOfRawData;
    thisSec.sectionData = splitBuffer(fileBegin, lowOff, highOff);

+    // GH#109: we trusted [lowOff, highOff) to be a range that yields
+    // a valid bounded_buffer, despite these being user-controllable.
+    // splitBuffer correctly handles this, but we failed to check for
+    // the nullptr it returns as a sentinel.
+    if (thisSec.sectionData == nullptr) {
+      return false;
+    }
+
    secs.push_back(thisSec);
  }

+  std::sort(
+      secs.begin(), secs.end(), [](const section &lhs, const section &rhs) {
+        return lhs.sec.PointerToRawData < rhs.sec.PointerToRawData;
+      });
+
  return true;
 }

@ -982,15 +995,15 @@ bool readOptionalHeader(bounded_buffer *b, optional_header_32 &header) {

  for (std::uint32_t i = 0; i < header.NumberOfRvaAndSizes; i++) {
    std::uint32_t c = (i * sizeof(data_directory));
-    c += _offset(optional_header_32, DataDirectory[0]);
+    c += offsetof(optional_header_32, DataDirectory[0]);
    std::uint32_t o;

-    o = c + _offset(data_directory, VirtualAddress);
+    o = c + offsetof(data_directory, VirtualAddress);
    if (!readDword(b, o, header.DataDirectory[i].VirtualAddress)) {
      return false;
    }

-    o = c + _offset(data_directory, Size);
+    o = c + offsetof(data_directory, Size);
    if (!readDword(b, o, header.DataDirectory[i].Size)) {
      return false;
    }
@ -1037,15 +1050,15 @@ bool readOptionalHeader64(bounded_buffer *b, optional_header_64 &header) {

  for (std::uint32_t i = 0; i < header.NumberOfRvaAndSizes; i++) {
    std::uint32_t c = (i * sizeof(data_directory));
-    c += _offset(optional_header_64, DataDirectory[0]);
+    c += offsetof(optional_header_64, DataDirectory[0]);
    std::uint32_t o;

-    o = c + _offset(data_directory, VirtualAddress);
+    o = c + offsetof(data_directory, VirtualAddress);
    if (!readDword(b, o, header.DataDirectory[i].VirtualAddress)) {
      return false;
    }

-    o = c + _offset(data_directory, Size);
+    o = c + offsetof(data_directory, Size);
    if (!readDword(b, o, header.DataDirectory[i].Size)) {
      return false;
    }
@ -1080,7 +1093,7 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {

  header.Signature = pe_magic;
  bounded_buffer *fhb =
-      splitBuffer(b, _offset(nt_header_32, FileHeader), b->bufLen);
+      splitBuffer(b, offsetof(nt_header_32, FileHeader), b->bufLen);

  if (fhb == nullptr) {
    PE_ERR(PEERR_MEM);
@ -1119,7 +1132,7 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
   * buffer regardless.
   */
  bounded_buffer *ohb =
-      splitBuffer(b, _offset(nt_header_32, OptionalHeader), b->bufLen);
+      splitBuffer(b, offsetof(nt_header_32, OptionalHeader), b->bufLen);

  if (ohb == nullptr) {
    deleteBuffer(fhb);
@ -1166,7 +1179,17 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
 // zero extends its first argument to 32 bits and then performs a rotate left
 // operation equal to the second arguments value of the first argument’s bits
 static inline std::uint32_t rol(std::uint32_t val, std::uint32_t num) {
-  return ((val << num) & 0xffffffff) | (val >> (32 - num));
+  assert(num < 32);
+  // Disable MSVC warning for unary minus operator applied to unsigned type
+#if defined(_MSC_VER) || defined(_MSC_FULL_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+  // https://blog.regehr.org/archives/1063
+  return (val << num) | (val >> (-num & 31));
+#if defined(_MSC_VER) || defined(_MSC_FULL_VER)
+#pragma warning(pop)
+#endif
 }

 std::uint32_t calculateRichChecksum(const bounded_buffer *b, pe_header &p) {
@ -1182,7 +1205,7 @@ std::uint32_t calculateRichChecksum(const bounded_buffer *b, pe_header &p) {
    if (i >= 0x3C && i <= 0x3F) {
      continue;
    }
-    checksum += rol(b->buf[i], i);
+    checksum += rol(b->buf[i], i & 0x1F);
  }

  // Next, take summation of each Rich header entry by combining its ProductId
@ -1467,7 +1490,7 @@ bool getExports(parsed_pe *p) {
    // get the name of this module
    std::uint32_t nameRva;
    if (!readDword(s.sectionData,
-                   rvaofft + _offset(export_dir_table, NameRVA),
+                   rvaofft + offsetof(export_dir_table, NameRVA),
                   nameRva)) {
      return false;
    }
@ -1495,7 +1518,7 @@ bool getExports(parsed_pe *p) {
    // now, get all the named export symbols
    std::uint32_t numNames;
    if (!readDword(s.sectionData,
-                   rvaofft + _offset(export_dir_table, NumberOfNamePointers),
+                   rvaofft + offsetof(export_dir_table, NumberOfNamePointers),
                   numNames)) {
      return false;
    }
@ -1504,7 +1527,7 @@ bool getExports(parsed_pe *p) {
      // get the names section
      std::uint32_t namesRVA;
      if (!readDword(s.sectionData,
-                     rvaofft + _offset(export_dir_table, NamePointerRVA),
+                     rvaofft + offsetof(export_dir_table, NamePointerRVA),
                     namesRVA)) {
        return false;
      }
@ -1529,7 +1552,8 @@ bool getExports(parsed_pe *p) {
      // get the EAT section
      std::uint32_t eatRVA;
      if (!readDword(s.sectionData,
-                     rvaofft + _offset(export_dir_table, ExportAddressTableRVA),
+                     rvaofft +
+                         offsetof(export_dir_table, ExportAddressTableRVA),
                     eatRVA)) {
        return false;
      }
@ -1553,7 +1577,7 @@ bool getExports(parsed_pe *p) {
      // get the ordinal base
      std::uint32_t ordinalBase;
      if (!readDword(s.sectionData,
-                     rvaofft + _offset(export_dir_table, OrdinalBase),
+                     rvaofft + offsetof(export_dir_table, OrdinalBase),
                     ordinalBase)) {
        return false;
      }
@ -1561,7 +1585,7 @@ bool getExports(parsed_pe *p) {
      // get the ordinal table
      std::uint32_t ordinalTableRVA;
      if (!readDword(s.sectionData,
-                     rvaofft + _offset(export_dir_table, OrdinalTableRVA),
+                     rvaofft + offsetof(export_dir_table, OrdinalTableRVA),
                     ordinalTableRVA)) {
        return false;
      }
@ -1704,13 +1728,13 @@ bool getRelocations(parsed_pe *p) {
      std::uint32_t blockSize;

      if (!readDword(d.sectionData,
-                     rvaofft + _offset(reloc_block, PageRVA),
+                     rvaofft + offsetof(reloc_block, PageRVA),
                     pageRva)) {
        return false;
      }

      if (!readDword(d.sectionData,
-                     rvaofft + _offset(reloc_block, BlockSize),
+                     rvaofft + offsetof(reloc_block, BlockSize),
                     blockSize)) {
        return false;
      }
@ -2294,31 +2318,36 @@ bool getSymbolTable(parsed_pe *p) {
      }

    } else {
-//      std::ios::fmtflags originalStreamFlags(std::cerr.flags());
+#ifdef PEPARSE_LIBRARY_WARNINGS
+      std::ios::fmtflags originalStreamFlags(std::cerr.flags());

-//      auto storageClassName = GetSymbolTableStorageClassName(sym.storageClass);
-//      if (storageClassName == nullptr) {
-//        std::cerr << "Warning: Skipping auxiliary symbol of type 0x" << std::hex
-//                  << static_cast<std::uint32_t>(sym.storageClass)
-//                  << " at offset 0x" << std::hex << offset << "\n";
-//      } else {
-//        std::cerr << "Warning: Skipping auxiliary symbol of type "
-//                  << storageClassName << " at offset 0x" << std::hex << offset
-//                  << "\n";
-//      }
+      auto storageClassName = GetSymbolTableStorageClassName(sym.storageClass);
+      if (storageClassName == nullptr) {
+        std::cerr << "Warning: Skipping auxiliary symbol of type 0x" << std::hex
+                  << static_cast<std::uint32_t>(sym.storageClass)
+                  << " at offset 0x" << std::hex << offset << "\n";
+      } else {

-//      std::cerr.flags(originalStreamFlags);
+        std::cerr << "Warning: Skipping auxiliary symbol of type "
+                  << storageClassName << " at offset 0x" << std::hex << offset
+                  << "\n";
+      }
+
+      std::cerr.flags(originalStreamFlags);
+#endif
      offset = nextSymbolOffset;
    }

    if (offset != nextSymbolOffset) {
-//      std::ios::fmtflags originalStreamFlags(std::cerr.flags());
+#ifdef PEPARSE_LIBRARY_WARNINGS
+      std::ios::fmtflags originalStreamFlags(std::cerr.flags());

-//      std::cerr << "Warning: Invalid internal offset (current: 0x" << std::hex
-//                << offset << ", expected: 0x" << std::hex << nextSymbolOffset
-//                << ")\n";
+      std::cerr << "Warning: Invalid internal offset (current: 0x" << std::hex
+                << offset << ", expected: 0x" << std::hex << nextSymbolOffset
+                << ")\n";

-//      std::cerr.flags(originalStreamFlags);
+      std::cerr.flags(originalStreamFlags);
+#endif
      offset = nextSymbolOffset;
    }
  }
@ -2326,7 +2355,7 @@ bool getSymbolTable(parsed_pe *p) {
  return true;
 }

-parsed_pe *ParsePEFromFile(const char *filePath) {
+parsed_pe *ParsePEFromBuffer(bounded_buffer *buffer) {
  // First, create a new parsed_pe structure
  // We pass std::nothrow parameter to new so in case of failure it returns
  // nullptr instead of throwing exception std::bad_alloc.
@ -2338,13 +2367,7 @@ parsed_pe *ParsePEFromFile(const char *filePath) {
  }

  // Make a new buffer object to hold just our file data
-  p->fileBuffer = readFileToFileBuffer(filePath);
-
-  if (p->fileBuffer == nullptr) {
-    delete p;
-    // err is set by readFileToFileBuffer
-    return nullptr;
-  }
+  p->fileBuffer = buffer;

  p->internal = new (std::nothrow) parsed_pe_internal();

@ -2420,6 +2443,28 @@ parsed_pe *ParsePEFromFile(const char *filePath) {
  return p;
 }

+parsed_pe *ParsePEFromFile(const char *filePath) {
+  auto buffer = readFileToFileBuffer(filePath);
+
+  if (buffer == nullptr) {
+    // err is set by readFileToFileBuffer
+    return nullptr;
+  }
+
+  return ParsePEFromBuffer(buffer);
+}
+
+parsed_pe *ParsePEFromPointer(std::uint8_t *ptr, std::uint32_t sz) {
+  auto buffer = makeBufferFromPointer(ptr, sz);
+
+  if (buffer == nullptr) {
+    // err is set by makeBufferFromPointer
+    return nullptr;
+  }
+
+  return ParsePEFromBuffer(buffer);
+}
+
 void DestructParsedPE(parsed_pe *p) {
  if (p == nullptr) {
    return;
@ -2447,7 +2492,7 @@ void DestructParsedPE(parsed_pe *p) {
 void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd) {
  std::vector<importent> &l = pe->internal->imports;

-  for (importent i : l) {
+  for (importent &i : l) {
    if (cb(cbd, i.addr, i.moduleName, i.symbolName) != 0) {
      break;
    }
@ -2460,7 +2505,7 @@ void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd) {
 void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) {
  std::vector<reloc> &l = pe->internal->relocs;

-  for (reloc r : l) {
+  for (reloc &r : l) {
    if (cb(cbd, r.shiftedAddr, r.type) != 0) {
      break;
    }
@ -2473,7 +2518,7 @@ void IterRelocs(parsed_pe *pe, iterReloc cb, void *cbd) {
 void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) {
  std::vector<symbol> &l = pe->internal->symbols;

-  for (symbol s : l) {
+  for (symbol &s : l) {
    if (cb(cbd,
           s.strName,
           s.value,
@ -2492,7 +2537,7 @@ void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd) {
 void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) {
  std::vector<exportent> &l = pe->internal->exports;

-  for (exportent i : l) {
+  for (exportent &i : l) {
    if (cb(cbd, i.addr, i.moduleName, i.symbolName) != 0) {
      break;
    }
@ -2505,7 +2550,7 @@ void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd) {
 void IterSec(parsed_pe *pe, iterSec cb, void *cbd) {
  parsed_pe_internal *pint = pe->internal;

-  for (section s : pint->secs) {
+  for (section &s : pint->secs) {
    if (cb(cbd, s.sectionBase, s.sectionName, s.sec, s.sectionData) != 0) {
      break;
    }
@ -2647,20 +2692,38 @@ bool GetDataDirectoryEntry(parsed_pe *pe,
    return false;
  }

-  section sec;
-  if (!getSecForVA(pe->internal->secs, addr, sec)) {
-    PE_ERR(PEERR_SECTVA);
-    return false;
-  }
+  /* NOTE(ww): DIR_SECURITY is an annoying special case: its contents
+   * are never mapped into memory, so its "RVA" is actually a direct
+   * file offset.
+   * See:
+   * https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only
+   */
+  if (dirnum == DIR_SECURITY) {
+    auto *buf = splitBuffer(
+        pe->fileBuffer, dir.VirtualAddress, dir.VirtualAddress + dir.Size);
+    if (buf == nullptr) {
+      PE_ERR(PEERR_SIZE);
+      return false;
+    }

-  auto off = static_cast<std::uint32_t>(addr - sec.sectionBase);
-  if (off + dir.Size >= sec.sectionData->bufLen) {
-    PE_ERR(PEERR_SIZE);
-    return false;
-  }
+    raw_entry.assign(buf->buf, buf->buf + buf->bufLen);
+    deleteBuffer(buf);
+  } else {
+    section sec;
+    if (!getSecForVA(pe->internal->secs, addr, sec)) {
+      PE_ERR(PEERR_SECTVA);
+      return false;
+    }

-  raw_entry.assign(sec.sectionData->buf + off,
-                   sec.sectionData->buf + off + dir.Size);
+    auto off = static_cast<std::uint32_t>(addr - sec.sectionBase);
+    if (off + dir.Size >= sec.sectionData->bufLen) {
+      PE_ERR(PEERR_SIZE);
+      return false;
+    }
+
+    raw_entry.assign(sec.sectionData->buf + off,
+                     sec.sectionData->buf + off + dir.Size);
+  }

  return true;
 }
--- a/pe-parser-library/src/unicode_codecvt.cpp
+++ b/pe-parser-library/src/unicode_codecvt.cpp
@ -24,25 +24,13 @@ THE SOFTWARE.

 #include <codecvt>
 #include <locale>
-#include <parser-library/to_string.h>
+#include <pe-parse/to_string.h>

 namespace peparse {
 // See
 // https://stackoverflow.com/questions/38688417/utf-conversion-functions-in-c11
 std::string from_utf16(const UCharString &u) {
-#if defined(_MSC_VER)
-  // std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>convert;
-  // // Doesn't compile with Visual Studio. See
-  // https://stackoverflow.com/questions/32055357/visual-studio-c-2015-stdcodecvt-with-char16-t-or-char32-t
-  std::wstring_convert<std::codecvt_utf8<std::int16_t>, std::int16_t> convert;
-  auto p = reinterpret_cast<const std::int16_t *>(u.data());
-  return convert.to_bytes(p, p + u.size());
-#else
-  // -std=c++11 or -std=c++14
-  // Requires GCC 5 or higher
-  // Requires Clang ??? or higher (tested on Clang 3.8, 5.0, 6.0)
  std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
  return convert.to_bytes(u);
-#endif
 }
 } // namespace peparse
--- a/pe-parser-library/src/unicode_winapi.cpp
+++ b/pe-parser-library/src/unicode_winapi.cpp
@ -1,7 +1,7 @@
 /*
 The MIT License (MIT)

-Copyright (c) 2019 Trail of Bits, Inc.
+Copyright (c) 2020 Trail of Bits, Inc.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@ -22,14 +22,35 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */

-#include <parser-library/to_string.h>
-#include <unicode/unistr.h>
+#include <Windows.h>
+#include <pe-parse/to_string.h>

 namespace peparse {
 std::string from_utf16(const UCharString &u) {
-  icu::UnicodeString utf16_string = icu::UnicodeString(u.data(), u.length());
  std::string result;
-  utf16_string.toUTF8String(result);
+  std::size_t size = WideCharToMultiByte(CP_UTF8,
+                                         0,
+                                         u.data(),
+                                         static_cast<int>(u.size()),
+                                         nullptr,
+                                         0,
+                                         nullptr,
+                                         nullptr);
+
+  if (size <= 0) {
+    return result;
+  }
+
+  result.reserve(size);
+  WideCharToMultiByte(CP_UTF8,
+                      0,
+                      u.data(),
+                      static_cast<int>(u.size()),
+                      &result[0],
+                      static_cast<int>(result.capacity()),
+                      nullptr,
+                      nullptr);
+
  return result;
 }
 } // namespace peparse
--- a/pepy/README.md
+++ b/pepy/README.md
@ -0,0 +1,204 @@
+pepy
+====
+pepy (pronounced p-pie) is a python binding to the pe-parse parser.
+
+pepy supports Python versions 3.6 and above.
+
+The easiest way to use pepy is to install it via pip:
+
+```bash
+$ pip3 install pepy
+```
+
+## Building
+
+If you can build pe-parse and have a working python environment (headers and
+libraries) you can build pepy.
+
+1. Build pepy:
+  * `python3 setup.py build`
+2. Install pepy:
+  * `python3 setup.py install`
+
+**Building on Windows:** Python 3.x is typically installed as _python.exe_,
+**NOT** _python3.exe_.
+
+## Using
+
+### Parsed object
+
+There are a number of objects involved in pepy. The main one is the **parsed**
+object. This object is returned by the *parse* method.
+
+```python
+import pepy
+p = pepy.parse("/path/to/exe")
+```
+
+The **parsed** object has a number of methods:
+
+* `get_entry_point`: Return the entry point address
+* `get_machine_as_str`: Return the machine as a human readable string
+* `get_subsystem_as_str`: Return the subsystem as a human readable string
+* `get_bytes`: Return the first N bytes at a given address
+* `get_sections`: Return a list of section objects
+* `get_imports`: Return a list of import objects
+* `get_exports`: Return a list of export objects
+* `get_relocations`: Return a list of relocation objects
+* `get_resources`: Return a list of resource objects
+
+The **parsed** object has a number of attributes:
+
+* `signature`
+* `machine`
+* `numberofsections`
+* `timedatestamp`
+* `numberofsymbols`
+* `characteristics`
+* `magic`
+* `majorlinkerver`
+* `minorlinkerver`
+* `codesize`
+* `initdatasize`
+* `uninitdatasize`
+* `entrypointaddr`
+* `baseofcode`
+* `baseofdata`
+* `imagebase`
+* `sectionalignement`
+* `filealignment`
+* `majorosver`
+* `minorosver`
+* `win32ver`
+* `imagesize`
+* `headersize`
+* `checksum`
+* `subsystem`
+* `dllcharacteristics`
+* `stackreservesize`
+* `stackcommitsize`
+* `heapreservesize`
+* `heapcommitsize`
+* `loaderflags`
+* `rvasandsize`
+
+Example:
+
+```python
+import time
+import pepy
+
+p = pepy.parse("/path/to/exe")
+print("Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp)))
+ep = p.get_entry_point()
+print("Entry point: 0x%x" % ep)
+```
+
+The `get_sections`, `get_imports`, `get_exports`, `get_relocations` and
+`get_resources` methods each return a list of objects. The type of object
+depends upon the method called. `get_sections` returns a list of `section`
+objects, `get_imports` returns a list of `import` objects, etc.
+
+### Section Object
+
+The `section` object has the following attributes:
+
+* `base`
+* `length`
+* `virtaddr`
+* `virtsize`
+* `numrelocs`
+* `numlinenums`
+* `characteristics`
+* `data`
+
+### Import Object
+
+The `import` object has the following attributes:
+
+* `sym`
+* `name`
+* `addr`
+
+### Export Object
+
+The `export` object has the following attributes:
+
+* `mod`
+* `func`
+* `addr`
+
+### Relocation Object
+
+The `relocation` object has the following attributes:
+
+* `type`
+* `addr`
+
+### Resource Object
+
+The `resource` object has the following attributes:
+
+* `type_str`
+* `name_str`
+* `lang_str`
+* `type`
+* `name`
+* `lang`
+* `codepage`
+* `RVA`
+* `size`
+* `data`
+
+The `resource` object has the following methods:
+
+* `type_as_str`
+
+Resources are stored in a directory structure. The first three levels of the
+are called `type`, `name` and `lang`. Each of these levels can have
+either a pre-defined value or a custom string. The pre-defined values are
+stored in the `type`, `name` and `lang` attributes. If a custom string is
+found it will be stored in the `type_str`, `name_str` and `lang_str`
+attributes. The `type_as_str` method can be used to convert a pre-defined
+type value to a string representation.
+
+The following code shows how to iterate through resources:
+
+```python
+import pepy
+
+from hashlib import md5
+import sys
+
+p = pepy.parse(sys.argv[1])
+resources = p.get_resources()
+print("Resources: (%i)" % len(resources))
+for resource in resources:
+    print("[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest()))
+    if resource.type_str:
+        print("\tType string: %s" % resource.type_str)
+    else:
+        print("\tType: %s (%s)" % (hex(resource.type), resource.type_as_str()))
+    if resource.name_str:
+        print("\tName string: %s" % resource.name_str)
+    else:
+        print("\tName: %s" % hex(resource.name))
+    if resource.lang_str:
+        print("\tLang string: %s" % resource.lang_str)
+    else:
+        print("\tLang: %s" % hex(resource.lang))
+    print("\tCodepage: %s" % hex(resource.codepage))
+    print("\tRVA: %s" % hex(resource.RVA))
+    print("\tSize: %s" % hex(resource.size))
+```
+
+Note that some binaries (particularly packed) may have corrupt resource entries.
+In these cases you may find that `len(resource.data)` is 0 but `resource.size` is
+greater than 0. The `size` attribute is the size of the data as declared by the
+resource data entry.
+
+## Authors
+
+pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com).
+
+pepy was written by Wesley Shields (wxs@atarininja.org).
--- a/python/pepy.cpp
+++ b/python/pepy.cpp
@ -26,31 +26,14 @@
 */

 #include <Python.h>
-#include <parser-library/parse.h>
+#include <pe-parse/parse.h>
 #include <structmember.h>

 using namespace peparse;

-#define PEPY_VERSION "0.3"
-
-/*
- * Add some definition for compatibility between python2 and python3
+/* NOTE(ww): These don't necessarily have to be the same, but currently are.
 */
-#if PY_MAJOR_VERSION >= 3
-#define PyString_FromString PyUnicode_FromString
-#endif
-
-/*
- * Some macro only available after python 2.6
- * Needed for compatibility with python3
- */
-#ifndef PyVarObject_HEAD_INIT
-#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
-#endif
-
-#ifndef Py_TYPE
-#define Py_TYPE(_ob_) (((PyObject *) (_ob_))->ob_type)
-#endif
+#define PEPY_VERSION PEPARSE_VERSION

 /* These are used to across multiple objects. */
 #define PEPY_OBJECT_GET(OBJ, ATTR)                                          \
@ -154,9 +137,9 @@ static void pepy_import_dealloc(pepy_import *self) {
  Py_TYPE(self)->tp_free((PyObject *) self);
 }

-PEPY_OBJECT_GET(import, name)
-PEPY_OBJECT_GET(import, sym)
-PEPY_OBJECT_GET(import, addr)
+PEPY_OBJECT_GET(import, name);
+PEPY_OBJECT_GET(import, sym);
+PEPY_OBJECT_GET(import, addr);

 static PyGetSetDef pepy_import_getseters[] = {
    OBJECTGETTER(import, name, "Name"),
@ -228,9 +211,9 @@ static void pepy_export_dealloc(pepy_export *self) {
  Py_TYPE(self)->tp_free((PyObject *) self);
 }

-PEPY_OBJECT_GET(export, mod)
-PEPY_OBJECT_GET(export, func)
-PEPY_OBJECT_GET(export, addr)
+PEPY_OBJECT_GET(export, mod);
+PEPY_OBJECT_GET(export, func);
+PEPY_OBJECT_GET(export, addr);

 static PyGetSetDef pepy_export_getseters[] = {
    OBJECTGETTER(export, mod, "Module"),
@ -302,8 +285,8 @@ static void pepy_relocation_dealloc(pepy_relocation *self) {
  Py_TYPE(self)->tp_free((PyObject *) self);
 }

-PEPY_OBJECT_GET(relocation, type)
-PEPY_OBJECT_GET(relocation, addr)
+PEPY_OBJECT_GET(relocation, type);
+PEPY_OBJECT_GET(relocation, addr);

 static PyGetSetDef pepy_relocation_getseters[] = {
    OBJECTGETTER(relocation, type, "Type"),
@ -390,15 +373,15 @@ static void pepy_section_dealloc(pepy_section *self) {
  Py_TYPE(self)->tp_free((PyObject *) self);
 }

-PEPY_OBJECT_GET(section, name)
-PEPY_OBJECT_GET(section, base)
-PEPY_OBJECT_GET(section, length)
-PEPY_OBJECT_GET(section, virtaddr)
-PEPY_OBJECT_GET(section, virtsize)
-PEPY_OBJECT_GET(section, numrelocs)
-PEPY_OBJECT_GET(section, numlinenums)
-PEPY_OBJECT_GET(section, characteristics)
-PEPY_OBJECT_GET(section, data)
+PEPY_OBJECT_GET(section, name);
+PEPY_OBJECT_GET(section, base);
+PEPY_OBJECT_GET(section, length);
+PEPY_OBJECT_GET(section, virtaddr);
+PEPY_OBJECT_GET(section, virtsize);
+PEPY_OBJECT_GET(section, numrelocs);
+PEPY_OBJECT_GET(section, numlinenums);
+PEPY_OBJECT_GET(section, characteristics);
+PEPY_OBJECT_GET(section, data);

 static PyGetSetDef pepy_section_getseters[] = {
    OBJECTGETTER(section, name, "Name"),
@ -495,16 +478,16 @@ static void pepy_resource_dealloc(pepy_resource *self) {
  Py_TYPE(self)->tp_free((PyObject *) self);
 }

-PEPY_OBJECT_GET(resource, type_str)
-PEPY_OBJECT_GET(resource, name_str)
-PEPY_OBJECT_GET(resource, lang_str)
-PEPY_OBJECT_GET(resource, type)
-PEPY_OBJECT_GET(resource, name)
-PEPY_OBJECT_GET(resource, lang)
-PEPY_OBJECT_GET(resource, codepage)
-PEPY_OBJECT_GET(resource, RVA)
-PEPY_OBJECT_GET(resource, size)
-PEPY_OBJECT_GET(resource, data)
+PEPY_OBJECT_GET(resource, type_str);
+PEPY_OBJECT_GET(resource, name_str);
+PEPY_OBJECT_GET(resource, lang_str);
+PEPY_OBJECT_GET(resource, type);
+PEPY_OBJECT_GET(resource, name);
+PEPY_OBJECT_GET(resource, lang);
+PEPY_OBJECT_GET(resource, codepage);
+PEPY_OBJECT_GET(resource, RVA);
+PEPY_OBJECT_GET(resource, size);
+PEPY_OBJECT_GET(resource, data);

 static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
  PyObject *ret;
@ -587,7 +570,7 @@ static PyObject *pepy_resource_type_as_str(PyObject *self, PyObject *args) {
      break;
  }

-  ret = PyString_FromString(str);
+  ret = PyUnicode_FromString(str);
  if (!ret) {
    PyErr_SetString(pepy_error, "Unable to create return string.");
    return NULL;
@ -713,7 +696,7 @@ static PyObject *pepy_parsed_get_machine_as_str(PyObject *self,
  if (!str)
    Py_RETURN_NONE;

-  ret = PyString_FromString(str);
+  ret = PyUnicode_FromString(str);
  if (!ret) {
    PyErr_SetString(pepy_error, "Unable to create return string.");
    return NULL;
@ -731,7 +714,7 @@ static PyObject *pepy_parsed_get_subsystem_as_str(PyObject *self,
  if (!str)
    Py_RETURN_NONE;

-  ret = PyString_FromString(str);
+  ret = PyUnicode_FromString(str);
  if (!ret) {
    PyErr_SetString(pepy_error, "Unable to create return string.");
    return NULL;
@ -804,10 +787,10 @@ static PyObject *pepy_data_converter(bounded_buffer *data) {
 }

 int section_callback(void *cbd,
-                     VA base,
-                     std::string &name,
-                     image_section_header s,
-                     bounded_buffer *data) {
+                     const VA &base,
+                     const std::string &name,
+                     const image_section_header &s,
+                     const bounded_buffer *data) {
  uint32_t buflen;
  PyObject *sect;
  PyObject *tuple;
@ -878,7 +861,7 @@ static PyObject *pepy_parsed_get_sections(PyObject *self, PyObject *args) {
  return ret;
 }

-int resource_callback(void *cbd, resource r) {
+int resource_callback(void *cbd, const resource &r) {
  PyObject *rsrc;
  PyObject *tuple;
  PyObject *list = (PyObject *) cbd;
@ -938,7 +921,7 @@ static PyObject *pepy_parsed_get_resources(PyObject *self, PyObject *args) {
 }

 int import_callback(void *cbd,
-                    VA addr,
+                    const VA &addr,
                    const std::string &name,
                    const std::string &sym) {
  PyObject *imp;
@ -985,7 +968,10 @@ static PyObject *pepy_parsed_get_imports(PyObject *self, PyObject *args) {
  return ret;
 }

-int export_callback(void *cbd, VA addr, std::string &mod, std::string &func) {
+int export_callback(void *cbd,
+                    const VA &addr,
+                    const std::string &mod,
+                    const std::string &func) {
  PyObject *exp;
  PyObject *tuple;
  PyObject *list = (PyObject *) cbd;
@ -1034,7 +1020,7 @@ static PyObject *pepy_parsed_get_exports(PyObject *self, PyObject *args) {
  return ret;
 }

-int reloc_callback(void *cbd, VA addr, reloc_type type) {
+int reloc_callback(void *cbd, const VA &addr, const reloc_type &type) {
  PyObject *reloc;
  PyObject *tuple;
  PyObject *list = (PyObject *) cbd;
@ -1088,13 +1074,13 @@ static PyObject *pepy_parsed_get_relocations(PyObject *self, PyObject *args) {
    return ret;                                                            \
  }

-PEPY_PARSED_GET(signature, Signature)
-PEPY_PARSED_GET(machine, FileHeader.Machine)
-PEPY_PARSED_GET(numberofsections, FileHeader.NumberOfSections)
-PEPY_PARSED_GET(timedatestamp, FileHeader.TimeDateStamp)
-PEPY_PARSED_GET(numberofsymbols, FileHeader.NumberOfSymbols)
-PEPY_PARSED_GET(characteristics, FileHeader.Characteristics)
-PEPY_PARSED_GET(magic, OptionalMagic)
+PEPY_PARSED_GET(signature, Signature);
+PEPY_PARSED_GET(machine, FileHeader.Machine);
+PEPY_PARSED_GET(numberofsections, FileHeader.NumberOfSections);
+PEPY_PARSED_GET(timedatestamp, FileHeader.TimeDateStamp);
+PEPY_PARSED_GET(numberofsymbols, FileHeader.NumberOfSymbols);
+PEPY_PARSED_GET(characteristics, FileHeader.Characteristics);
+PEPY_PARSED_GET(magic, OptionalMagic);

 /*
 * This is used to get things from the optional header, which can be either
@ -1125,8 +1111,8 @@ PEPY_PARSED_GET(magic, OptionalMagic)
    return ret;                                                            \
  }

-PEPY_PARSED_GET_OPTIONAL(majorlinkerver, MajorLinkerVersion)
-PEPY_PARSED_GET_OPTIONAL(minorlinkerver, MinorLinkerVersion)
+PEPY_PARSED_GET_OPTIONAL(majorlinkerver, MajorLinkerVersion);
+PEPY_PARSED_GET_OPTIONAL(minorlinkerver, MinorLinkerVersion);
 PEPY_PARSED_GET_OPTIONAL(codesize, SizeOfCode);
 PEPY_PARSED_GET_OPTIONAL(initdatasize, SizeOfInitializedData);
 PEPY_PARSED_GET_OPTIONAL(uninitdatasize, SizeOfUninitializedData);
@ -1328,7 +1314,7 @@ static PyObject *pepy_parse(PyObject *self, PyObject *args) {
 static PyMethodDef pepy_methods[] = {
    {"parse", pepy_parse, METH_VARARGS, "Parse PE from file."}, {NULL}};

-static PyObject *pepi_module_init(void) {
+PyMODINIT_FUNC PyInit_pepy(void) {
  PyObject *m;

  if (PyType_Ready(&pepy_parsed_type) < 0 ||
@ -1339,7 +1325,6 @@ static PyObject *pepi_module_init(void) {
      PyType_Ready(&pepy_resource_type) < 0)
    return NULL;

-#if PY_MAJOR_VERSION >= 3
  static struct PyModuleDef moduledef = {
      PyModuleDef_HEAD_INIT,
      "pepy",
@ -1351,13 +1336,8 @@ static PyObject *pepi_module_init(void) {
      NULL,
      NULL,
  };
-#endif

-#if PY_MAJOR_VERSION >= 3
  m = PyModule_Create(&moduledef);
-#else
-  m = Py_InitModule3("pepy", pepy_methods, "Python interface to pe-parse.");
-#endif
  if (!m)
    return NULL;

@ -1384,6 +1364,8 @@ static PyObject *pepi_module_init(void) {
  PyModule_AddObject(m, "pepy_resource", (PyObject *) &pepy_resource_type);

  PyModule_AddStringMacro(m, PEPY_VERSION);
+  PyModule_AddStringMacro(m, PEPARSE_VERSION);
+  PyModule_AddStringConstant(m, "__version__", PEPY_VERSION);

  PyModule_AddIntMacro(m, MZ_MAGIC);
  PyModule_AddIntMacro(m, NT_MAGIC);
@ -1447,13 +1429,3 @@ static PyObject *pepi_module_init(void) {

  return m;
 }
-
-#if PY_MAJOR_VERSION >= 3
-PyMODINIT_FUNC PyInit_pepy(void) {
-  return pepi_module_init();
-}
-#else
-PyMODINIT_FUNC initpepy(void) {
-  pepi_module_init();
-}
-#endif
--- a/python/README.md
+++ b/python/README.md
@ -1,217 +0,0 @@
-pepy
-====
-pepy (pronounced p-pie) is a python binding to the pe-parse parser.
-
-Building
-========
-If you can build pe-parse and have a working python environment (headers and
-libraries) you can build pepy.
-
-Python 2.7
----------
-1. Build pepy:
-  * python setup.py build
-2. Install pepy:
-  * python setup.py install
-
-**Building on Windows:** If you get a build error of 'Unable to find
-vcvarsall.bat', you must set the `VS90COMNTOOLS` environment variable prior
-to the appropriate path as per
-[this SO article](http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat):
-> While running setup.py for package installations, Python 2.7 searches for an
-> installed Visual Studio 2008. You can trick Python to use a newer Visual
-> Studio by setting the correct path in VS90COMNTOOLS environment variable
-> before calling setup.py.
-> 
-> Execute the following command based on the version of Visual Studio installed:
-> * Visual Studio 2010 (VS10): `SET VS90COMNTOOLS=%VS100COMNTOOLS%`
-> * Visual Studio 2012 (VS11): `SET VS90COMNTOOLS=%VS110COMNTOOLS%`
-> * Visual Studio 2013 (VS12): `SET VS90COMNTOOLS=%VS120COMNTOOLS%`
-> * Visual Studio 2015/2017 (VS14): `SET VS90COMNTOOLS=%VS140COMNTOOLS%`
-
-Python 3.x
----------
-1. Build pepy:
-  * python3 setup.py build
-2. Install pepy:
-  * python3 setup.py install
-
-**Building on Windows:** Python 3.x is typically installed as _python.exe_
-**NOT** _python3.exe_.
-
-Using
-=====
-Parsed object
-------------
-There are a number of objects involved in pepy. The main one is the **parsed**
-object. This object is returned by the *parse* method.
-
-```
-import pepy
-p = pepy.parse("/path/to/exe")
-```
-
-The **parsed** object has a number of methods:
-
-* get_entry_point: Return the entry point address
-* get_machine_as_str: Return the machine as a human readable string
-* get_subsystem_as_str: Return the subsystem as a human readable string
-* get_bytes: Return the first N bytes at a given address
-* get_sections: Return a list of section objects
-* get_imports: Return a list of import objects
-* get_exports: Return a list of export objects
-* get_relocations: Return a list of relocation objects
-* get_resources: Return a list of resource objects
-
-The **parsed** object has a number of attributes:
-
-* signature
-* machine
-* numberofsections
-* timedatestamp
-* numberofsymbols
-* characteristics
-* magic
-* majorlinkerver
-* minorlinkerver
-* codesize
-* initdatasize
-* uninitdatasize
-* entrypointaddr
-* baseofcode
-* baseofdata
-* imagebase
-* sectionalignement
-* filealignment
-* majorosver
-* minorosver
-* win32ver
-* imagesize
-* headersize
-* checksum
-* subsystem
-* dllcharacteristics
-* stackreservesize
-* stackcommitsize
-* heapreservesize
-* heapcommitsize
-* loaderflags
-* rvasandsize
-
-Example:
-```
-import time
-import pepy
-
-p = pepy.parse("/path/to/exe")
-print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))
-ep = p.get_entry_point()
-print "Entry point: 0x%x" % ep
-```
-
-The *get_sections*, *get_imports*, *get_exports*, *get_relocations* and
-*get_resources* methods each return a list of objects. The type of object
-depends upon the method called. *get_sections* returns a list of **section**
-objects, *get_imports* returns a list of **import** objects, etc.
-
-Section Object
--------------
-The **section** object has the following attributes:
-
-* base
-* length
-* virtaddr
-* virtsize
-* numrelocs
-* numlinenums
-* characteristics
-* data
-
-Import Object
-------------
-The **import** object has the following attributes:
-
-* sym
-* name
-* addr
-
-Export Object
-------------
-The **export** object has the following attributes:
-
-* mod
-* func
-* addr
-
-Relocation Object
-----------------
-The **relocation** object has the following attributes:
-
-* type
-* addr
-
-Resource Object
---------------
-The **resource** object has the following attributes:
-
-* type_str
-* name_str
-* lang_str
-* type
-* name
-* lang
-* codepage
-* RVA
-* size
-* data
-
-The **resource** object has the following methods:
-
-* type_as_str
-
-Resources are stored in a directory structure. The first three levels of the
-are called **type**, **name** and **lang**. Each of these levels can have
-either a pre-defined value or a custom string. The pre-defined values are
-stored in the *type*, *name* and *lang* attributes. If a custom string is
-found it will be stored in the *type_str*, *name_str* and *lang_str*
-attributes. The *type_as_str* method can be used to convert a pre-defined
-type value to a string representation.
-
-The following code shows how to iterate through resources:
-
-```
-import pepy
-
-from hashlib import md5
-
-p = pepy.parse(sys.argv[1])
-resources = p.get_resources()
-print "Resources: (%i)" % len(resources)
-for resource in resources:
-    print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())
-    if resource.type_str:
-        print "\tType string: %s" % resource.type_str
-    else:
-        print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())
-    if resource.name_str:
-        print "\tName string: %s" % resource.name_str
-    else:
-        print "\tName: %s" % hex(resource.name)
-    if resource.lang_str:
-        print "\tLang string: %s" % resource.lang_str
-    else:
-        print "\tLang: %s" % hex(resource.lang)
-    print "\tCodepage: %s" % hex(resource.codepage)
-    print "\tRVA: %s" % hex(resource.RVA)
-    print "\tSize: %s" % hex(resource.size)
-```
-
-Note that some binaries (particularly packed) may have corrupt resource entries.
-In these cases you may find that len(resource.data) is 0 but resource.size is
-greater than 0. The *size* attribute is the size of the data as declared by the
-resource data entry.
-
-Authors
-=======
-pe-parse was designed and implemented by Andrew Ruef (andrew@trailofbits.com)
-pepy was written by Wesley Shields (wxs@atarininja.org)
--- a/python/setup.py
+++ b/python/setup.py
@ -1,68 +0,0 @@
-# Copyright (c) 2013, Wesley Shields <wxs@atarininja.org>. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-
-from distutils.core import setup, Extension
-import os
-import sys
-import platform
-
-here = os.path.abspath(os.path.dirname(__file__))
-
-SOURCE_FILES = [os.path.join(here, 'pepy.cpp'),
-                os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'src', 'parse.cpp')),
-                os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'src', 'buffer.cpp'))]
-
-if platform.system() == 'Windows':
-  INCLUDE_DIRS = [os.path.abspath(os.path.join(os.path.dirname(sys.executable), 'include')),
-                  os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'include')),
-                  'C:\\usr\\include']
-  LIBRARY_DIRS = [os.path.abspath(os.path.join(os.path.dirname(sys.executable), 'libs')),
-                  'C:\\usr\\lib']
-  COMPILE_ARGS = ["/EHsc"]
-else:
-  INCLUDE_DIRS = ['/usr/local/include',
-                  '/opt/local/include',
-                  '/usr/include',
-                  os.path.abspath(os.path.join(here, '..', 'pe-parser-library', 'include'))]
-  LIBRARY_DIRS = ['/usr/lib',
-                  '/usr/local/lib']
-  COMPILE_ARGS = ["-std=c++11", "-g", "-O0"] # Debug only
-
-extension_mod = Extension('pepy',
-                          sources = SOURCE_FILES,
-                          extra_compile_args = COMPILE_ARGS,
-                          language='c++',
-                          include_dirs = INCLUDE_DIRS,
-                          library_dirs = LIBRARY_DIRS)
-
-
-setup (name = 'pepy',
-       version = '0.1',
-       description = 'python bindings for pe-parse',
-       author = 'Wesley Shields',
-       author_email = 'wxs@atarininja.org',
-       license = 'BSD',
-       long_description = 'Python bindings for pe-parse',
-       ext_modules = [extension_mod])
--- a/python/test.py
+++ b/python/test.py
@ -1,99 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import time
-import pepy
-import binascii
-
-from hashlib import md5
-
-try:
-    p = pepy.parse(sys.argv[1])
-except pepy.error as e:
-    print e
-    sys.exit(1)
-
-print "Magic: %s" % hex(p.magic)
-print "Signature: %s" % hex(p.signature)
-print "Machine: %s (%s)" % (hex(p.machine), p.get_machine_as_str())
-print "Number of sections: %s" % p.numberofsections
-print "Number of symbols: %s" % p.numberofsymbols
-print "Characteristics: %s" % hex(p.characteristics)
-print "Timedatestamp: %s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.timedatestamp))
-print "Major linker version: %s" % hex(p.majorlinkerver)
-print "Minor linker version: %s" % hex(p.minorlinkerver)
-print "Size of code: %s" % hex(p.codesize)
-print "Size of initialized data: %s" % hex(p.initdatasize)
-print "Size of uninitialized data: %s" % hex(p.uninitdatasize)
-print "Address of entry point: %s" % hex(p.entrypointaddr)
-print "Base address of code: %s" % hex(p.baseofcode)
-try:
-    print "Base address of data: %s" % hex(p.baseofdata)
-except:
-    # Not available on PE32+, ignore it.
-    pass
-print "Image base address: %s" % hex(p.imagebase)
-print "Section alignment: %s" % hex(p.sectionalignement)
-print "File alignment: %s" % hex(p.filealignment)
-print "Major OS version: %s" % hex(p.majorosver)
-print "Minor OS version: %s" % hex(p.minorosver)
-print "Win32 version: %s" % hex(p.win32ver)
-print "Size of image: %s" % hex(p.imagesize)
-print "Size of headers: %s" % hex(p.headersize)
-print "Checksum: %s" % hex(p.checksum)
-print "Subsystem: %s (%s)" % (hex(p.subsystem), p.get_subsystem_as_str())
-print "DLL characteristics: %s" % hex(p.dllcharacteristics)
-print "Size of stack reserve: %s" % hex(p.stackreservesize)
-print "Size of stack commit: %s" % hex(p.stackcommitsize)
-print "Size of heap reserve: %s" % hex(p.heapreservesize)
-print "Size of heap commit: %s" % hex(p.heapcommitsize)
-print "Loader flags: %s" % hex(p.loaderflags)
-print "Number of RVA and sizes: %s" % hex(p.rvasandsize)
-ep = p.get_entry_point()
-byts = p.get_bytes(ep, 8)
-print "Bytes at %s: %s" % (hex(ep), ' '.join(['0x' + binascii.hexlify(b) for b in str(byts)]))
-sections = p.get_sections()
-print "Sections: (%i)" % len(sections)
-for sect in sections:
-    print "[+] %s" % sect.name
-    print "\tBase: %s" % hex(sect.base)
-    print "\tLength: %s" % sect.length
-    print "\tVirtual address: %s" % hex(sect.virtaddr)
-    print "\tVirtual size: %i" % sect.virtsize
-    print "\tNumber of Relocations: %i" % sect.numrelocs
-    print "\tNumber of Line Numbers: %i" % sect.numlinenums
-    print "\tCharacteristics: %s" % hex(sect.characteristics)
-    if sect.length:
-        print "\tFirst 10 bytes: 0x%s" % binascii.hexlify(sect.data[:10])
-    print "\tMD5: %s" % md5(sect.data).hexdigest()
-imports = p.get_imports()
-print "Imports: (%i)" % len(imports)
-for imp in imports:
-    print "[+] Symbol: %s (%s %s)" % (imp.sym, imp.name, hex(imp.addr))
-exports = p.get_exports()
-print "Exports: (%i)" % len(exports)
-for exp in exports:
-    print "[+] Module: %s (%s %s)" % (exp.mod, exp.func, hex(exp.addr))
-relocations = p.get_relocations()
-print "Relocations: (%i)" % len(relocations)
-for reloc in relocations:
-    print "[+] Type: %s (%s)" % (reloc.type, hex(reloc.addr))
-resources = p.get_resources()
-print "Resources: (%i)" % len(resources)
-for resource in resources:
-    print "[+] MD5: (%i) %s" % (len(resource.data), md5(resource.data).hexdigest())
-    if resource.type_str:
-        print "\tType string: %s" % resource.type_str
-    else:
-        print "\tType: %s (%s)" % (hex(resource.type), resource.type_as_str())
-    if resource.name_str:
-        print "\tName string: %s" % resource.name_str
-    else:
-        print "\tName: %s" % hex(resource.name)
-    if resource.lang_str:
-        print "\tLang string: %s" % resource.lang_str
-    else:
-        print "\tLang: %s" % hex(resource.lang)
-    print "\tCodepage: %s" % hex(resource.codepage)
-    print "\tRVA: %s" % hex(resource.RVA)
-    print "\tSize: %s" % hex(resource.size)
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,101 @@
+# Copyright (c) 2013, Wesley Shields <wxs@atarininja.org>. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+from setuptools import setup, Extension
+import os
+import sys
+import platform
+
+here = os.path.dirname(__file__)
+pepy = os.path.join(here, "pepy")
+
+
+with open(os.path.join(pepy, "README.md")) as f:
+    README = f.read()
+
+with open(os.path.join(here, "VERSION")) as f:
+    VERSION = f.read().strip()
+
+SOURCE_FILES = [
+    os.path.join(pepy, "pepy.cpp"),
+    os.path.join(here, "pe-parser-library", "src", "parse.cpp"),
+    os.path.join(here, "pe-parser-library", "src", "buffer.cpp"),
+]
+
+INCLUDE_DIRS = []
+LIBRARY_DIRS = []
+
+if platform.system() == "Windows":
+    SOURCE_FILES.append(
+        os.path.join(here, "pe-parser-library", "src", "unicode_winapi.cpp")
+    )
+    INCLUDE_DIRS += [
+        os.path.abspath(os.path.join(os.path.dirname(sys.executable), "include")),
+        os.path.join(here, "pe-parser-library", "include"),
+        "C:\\usr\\include",
+    ]
+    LIBRARY_DIRS += [
+        os.path.abspath(os.path.join(os.path.dirname(sys.executable), "libs")),
+        "C:\\usr\\lib",
+    ]
+    COMPILE_ARGS = [
+        "/EHsc",
+        f'/D"PEPARSE_VERSION=\\"{VERSION}\\""',
+    ]
+else:
+    SOURCE_FILES.append(
+        os.path.join(here, "pe-parser-library", "src", "unicode_codecvt.cpp")
+    )
+    INCLUDE_DIRS += [
+        "/usr/local/include",
+        "/opt/local/include",
+        "/usr/include",
+        os.path.join(here, "pe-parser-library", "include"),
+    ]
+    LIBRARY_DIRS += ["/usr/lib", "/usr/local/lib"]
+    COMPILE_ARGS = ["-std=c++11", f'-DPEPARSE_VERSION="{VERSION}"']
+
+extension_mod = Extension(
+    "pepy",
+    sources=SOURCE_FILES,
+    extra_compile_args=COMPILE_ARGS,
+    language="c++",
+    include_dirs=INCLUDE_DIRS,
+    library_dirs=LIBRARY_DIRS,
+)
+
+setup(
+    name="pepy",
+    url="https://github.com/trailofbits/pe-parse",
+    python_requires=">= 3.6",
+    version=VERSION,
+    description="Python bindings for pe-parse",
+    long_description=README,
+    long_description_content_type="text/markdown",
+    author="Wesley Shields",
+    author_email="wxs@atarininja.org",
+    license="BSD",
+    ext_modules=[extension_mod],
+)
--- a/test/assets/example.exe
+++ b/test/assets/example.exe
--- a/python/test_python3.py
+++ b/python/test_python3.py
--- a/util/release
+++ b/util/release
@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+# release: perform the chore work required for a pe-parse/pepy release
+
+set -eo pipefail
+
+function installed {
+  cmd=$(command -v "${1}")
+
+  [[ -n "${cmd}" ]] && [[ -f "${cmd}" ]]
+  return ${?}
+}
+
+function die {
+  >&2 echo "Barf: ${*}"
+  exit 1
+}
+
+# Fail early if we don't have the expected tools.
+installed git || die "Missing dependency: git"
+
+# Fail early if `git status` reports any untracked changes.
+[[ -n $(git status -s) ]] && die "Untracked changes in repo"
+
+# Next, check the VERSION in version and make sure it doesn't already have a git tag.
+[[ -f ./VERSION ]] || die "Missing VERSION file; wrong directory?"
+version=v$(<./VERSION)
+[[ -n $(git tag -l "${version}") ]] && die "git tag for ${version} already exists!"
+
+# Next, craft a tag for the current HEAD. Push both the current commit and the tag.
+git tag "${version}"
+git push
+git push origin "${version}"
+
+echo OK