Merge remote-tracking branch 'r/master'

2025-05-08 09:39:34 +00:00 · 2019-10-16 12:01:56 +03:00 · 2019-10-16 12:01:56 +03:00 · b69a5fdcbb
commit b69a5fdcbb
parent 7138bb218a 1544c61c38
13 changed files with 397 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -45,6 +45,12 @@ cmake --build . --config Release
 cmake --build . --config Release --target install
 ```

+PE files that have a Resource section with strings for the Type are encoded in UTF-16, but that `std::string` expects UTF-8. Some cross-platform solution
+is desired. You can let cmake choose one it finds in your build environment or you can choose one from the following options yourself and specify it with
+the `-DUNICODE_LIBRARY` argument when generating the project files with cmake:
+* `icu` (preferred) - "[ICU](http://site.icu-project.org/) is a mature, widely used set of C/C++ and Java libraries providing Unicode and Globalization support for software applications"
+* `codecvt` - A C++ library header file ([now deprecated](http://open-std.org/JTC1/SC22/WG21/docs/papers/2017/p0618r0.html)) supported by some C++ runtimes
+
 ### Notes about Windows

 If you are building on Windows with Visual Studio, the generator option can be used to select the compiler version and the output architecture:
@ -57,6 +63,11 @@ cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release ..
 cmake -G "Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=Release ..
 ```

+Visual Studio 2015 or higher is required to use codecvt, but you also have the option of using [ICU](http://site.icu-project.org/). The easiest way to
+get started with ICU in Windows is with [vcpkg](https://vcpkg.readthedocs.io/): `vcpkg install icu`. Then add the
+`-DCMAKE_TOOLCHAIN_FILE=C:\src\vcpkg\scripts\buildsystems\vcpkg.cmake` argument when generating the project files with cmake to add the appropriate
+library and include directories to the project.
+
 Using the library
 =======
 Once the library is installed, linking to it is easy! Add the following lines in your CMake project:
--- a/cmake/compilation_flags.cmake
+++ b/cmake/compilation_flags.cmake
@ -11,6 +11,7 @@ if (MSVC)

 else ()
  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
  set(CMAKE_CXX_EXTENSIONS OFF)

  if (NOT MINGW)
--- a/dump-pe/main.cpp
+++ b/dump-pe/main.cpp
@ -227,6 +227,17 @@ int printSymbols(void *N,
  return 0;
 }

+int printRich(void *N, rich_entry r) {
+  static_cast<void>(N);
+
+  std::cout << std::setw(10) << "ProdId:" << std::setw(7) << r.ProductId;
+  std::cout << std::setw(10) << "Build:" << std::setw(7) << r.BuildNumber;
+  std::cout << std::setw(10) << "Name:"
+            << std::setw(20) << GetRichProductName(r.ProductId, r.BuildNumber);
+  std::cout << std::setw(10) << "Count:" << std::setw(7) << r.Count << "\n";
+  return 0;
+}
+
 int printRsrc(void *N, resource r) {
  static_cast<void>(N);

@ -288,6 +299,13 @@ int main(int argc, char *argv[]) {
  parsed_pe *p = ParsePEFromFile(argv[1]);

  if (p != NULL) {
+    // Print Rich header info
+    if(p->peHeader.rich.isPresent) {
+      std::cout << "Rich header: present\n";
+      IterRich(p, printRich, NULL);
+    } else {
+      std::cout << "Rich header: not present\n";
+    }
    // print out some things
    DUMP_FIELD(Signature);
    DUMP_FIELD(FileHeader.Machine);
--- a/pe-parser-library/CMakeLists.txt
+++ b/pe-parser-library/CMakeLists.txt
@ -1,6 +1,15 @@
 cmake_minimum_required(VERSION 3.7)
 project(pe-parser-library)

+set(UNICODE_LIBRARY "any" CACHE STRING "Select a unicode library")
+set_property(CACHE UNICODE_LIBRARY PROPERTY STRINGS "any" "icu" "codecvt")
+
+# This variable is used twice so setting once at the top here to prevent
+# the chance they get out of sync.
+# This is the minimum "required" version but there's a good chance early
+# versions of ICU support the simple functionality needed by this project.
+set(ICU_MINIMUM_REQUIRED 55.0)
+
 # List all files explicitly; this will make IDEs happy (i.e. QtCreator, CLion, ...)
 list(APPEND PEPARSERLIB_SOURCEFILES
  include/parser-library/parse.h
@ -11,9 +20,57 @@ list(APPEND PEPARSERLIB_SOURCEFILES
  src/parse.cpp
 )

+# Check for codecvt support. Likely the proper way to do this would be to
+# use CMake system inspection via methods like "try_compile" to determine
+# if the "#include <codecvt>" directive compiles successfully.
+if (MSVC)
+  if (MSVC_VERSION LESS 1900)
+    set(CODECVT_SUPPORTED OFF)
+  else ()
+    set(CODECVT_SUPPORTED ON)
+  endif ()
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+    set(CODECVT_SUPPORTED OFF)
+  else ()
+    set(CODECVT_SUPPORTED ON)
+  endif ()
+else ()
+  find_path(CODECVT_INCLUDE_DIR NAMES "codecvt")
+  if (CODECVT_INCLUDE_DIR)
+    set(CODECVT_SUPPORTED OFF)
+  else ()
+    set(CODECVT_SUPPORTED ON)
+  endif ()
+endif ()
+
+if(${UNICODE_LIBRARY} MATCHES "icu")
+  find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc REQUIRED)
+  add_definitions(-DUSE_ICU4C)
+  list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
+elseif(${UNICODE_LIBRARY} MATCHES "codecvt")
+  if(NOT CODECVT_SUPPORTED)
+    message(SEND_ERROR "codecvt header not found")
+  endif()
+  list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
+else()
+  find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc)
+  if(ICU_FOUND)
+    add_definitions(-DUSE_ICU4C)
+    list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
+  elseif(CODECVT_SUPPORTED)
+    list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
+  else()
+    message(SEND_ERROR "unable to find codecvt header or ICU library (hint: try installing libicu-dev)")
+  endif(ICU_FOUND)
+endif()
+
 add_library(${PROJECT_NAME} ${PEPARSERLIB_SOURCEFILES})
 target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_compile_options(${PROJECT_NAME} PRIVATE ${GLOBAL_CXXFLAGS})
+if(ICU_FOUND)
+  target_link_libraries(${PROJECT_NAME} ICU::uc)
+endif()

 install(TARGETS ${PROJECT_NAME}
  RUNTIME DESTINATION "bin"
--- a/pe-parser-library/include/parser-library/nt-headers.h
+++ b/pe-parser-library/include/parser-library/nt-headers.h
@ -26,6 +26,7 @@ THE SOFTWARE.

 #include <cstdint>
 #include <string>
+#include <vector>

 #define _offset(t, f)         \
  static_cast<std::uint32_t>( \
@ -36,6 +37,9 @@ THE SOFTWARE.
 // some constant definitions
 // clang-format off
 namespace peparse {
+constexpr std::uint32_t RICH_MAGIC_END = 0x68636952;
+constexpr std::uint32_t RICH_MAGIC_START = 0x536e6144;
+constexpr std::uint32_t RICH_OFFSET = 0x80;
 constexpr std::uint16_t MZ_MAGIC = 0x5A4D;
 constexpr std::uint32_t NT_MAGIC = 0x00004550;
 constexpr std::uint16_t NUM_DIR_ENTRIES = 16;
@ -193,7 +197,7 @@ constexpr std::uint16_t IMAGE_SYM_DTYPE_FUNCTION = 2;
 constexpr std::uint16_t IMAGE_SYM_DTYPE_ARRAY = 3;

 // Symbol table storage classes
-constexpr std::uint8_t IMAGE_SYM_CLASS_END_OF_FUNCTION = static_cast<const std::uint8_t>(-1);
+constexpr std::uint8_t IMAGE_SYM_CLASS_END_OF_FUNCTION = static_cast<std::uint8_t>(-1);
 constexpr std::uint8_t IMAGE_SYM_CLASS_NULL = 0;
 constexpr std::uint8_t IMAGE_SYM_CLASS_AUTOMATIC = 1;
 constexpr std::uint8_t IMAGE_SYM_CLASS_EXTERNAL = 2;
@ -338,6 +342,20 @@ struct nt_header_32 {
  std::uint16_t OptionalMagic;
 };

+struct rich_entry {
+  std::uint16_t ProductId;
+  std::uint16_t BuildNumber;
+  std::uint32_t Count;
+};
+
+struct rich_header {
+  std::uint32_t StartSignature;
+  std::vector<rich_entry> Entries;
+  std::uint32_t EndSignature;
+  std::uint32_t DecryptionKey;
+  bool isPresent;
+};
+
 /*
 * This structure is only used to know how far to move the offset
 * when parsing resources. The data is stored in a resource_dir_entry
--- a/pe-parser-library/include/parser-library/parse.h
+++ b/pe-parser-library/include/parser-library/parse.h
@ -25,6 +25,7 @@ THE SOFTWARE.
 #pragma once

 #include <cstdint>
+#include <map>
 #include <string>

 #include "nt-headers.h"
@ -142,6 +143,7 @@ bool readByte(bounded_buffer *b, std::uint32_t offset, std::uint8_t &out);
 bool readWord(bounded_buffer *b, std::uint32_t offset, std::uint16_t &out);
 bool readDword(bounded_buffer *b, std::uint32_t offset, std::uint32_t &out);
 bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out);
+bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out);

 bounded_buffer *readFileToFileBuffer(const char *filePath);
 bounded_buffer *
@ -152,6 +154,7 @@ uint64_t bufLen(bounded_buffer *b);
 struct parsed_pe_internal;

 typedef struct _pe_header {
+  rich_header rich;
  nt_header_32 nt;
 } pe_header;

@ -161,6 +164,11 @@ typedef struct _parsed_pe {
  pe_header peHeader;
 } parsed_pe;

+// Resolve a Rich header product id / build number pair to a known
+// product name
+typedef std::pair<std::uint16_t, std::uint16_t> ProductKey;
+const std::string& GetRichProductName(std::uint16_t prodId, std::uint16_t buildNum);
+
 // get parser error status as integer
 std::uint32_t GetPEErr();

@ -176,6 +184,10 @@ parsed_pe *ParsePEFromFile(const char *filePath);
 // destruct a PE context
 void DestructParsedPE(parsed_pe *p);

+// iterate over Rich header entries
+typedef int (*iterRich)(void *, rich_entry);
+void IterRich(parsed_pe *pe, iterRich cb, void *cbd);
+
 // iterate over the resources
 typedef int (*iterRsrc)(void *, resource);
 void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd);
--- a/pe-parser-library/include/parser-library/to_string.h
+++ b/pe-parser-library/include/parser-library/to_string.h
@ -1,7 +1,15 @@
 #pragma once

+#include <string>
 #include <sstream>

+#ifdef USE_ICU4C
+#include <unicode/unistr.h>
+typedef std::basic_string<UChar> UCharString;
+#else
+typedef std::u16string UCharString;
+#endif
+
 namespace peparse {
 template <class T>
 static std::string to_string(T t, std::ios_base &(*f)(std::ios_base &) ) {
@ -9,4 +17,6 @@ static std::string to_string(T t, std::ios_base &(*f)(std::ios_base &) ) {
  oss << f << t;
  return oss.str();
 }
+
+std::string from_utf16(const UCharString &u);
 } // namespace peparse
--- a/pe-parser-library/src/buffer.cpp
+++ b/pe-parser-library/src/buffer.cpp
@ -164,6 +164,31 @@ bool readQword(bounded_buffer *b, std::uint32_t offset, std::uint64_t &out) {
  return true;
 }

+bool readChar16(bounded_buffer *b, std::uint32_t offset, char16_t &out) {
+  if (b == nullptr) {
+    PE_ERR(PEERR_BUFFER);
+    return false;
+  }
+
+  if (offset + 1 >= b->bufLen) {
+    PE_ERR(PEERR_ADDRESS);
+    return false;
+  }
+
+  char16_t *tmp = nullptr;
+  if (b->swapBytes) {
+    std::uint8_t tmpBuf[2];
+    tmpBuf[0] = *(b->buf + offset + 1);
+    tmpBuf[1] = *(b->buf + offset);
+    tmp = reinterpret_cast<char16_t *>(tmpBuf);
+  } else {
+    tmp = reinterpret_cast<char16_t *>(b->buf + offset);
+  }
+  out = *tmp;
+
+  return true;
+}
+
 bounded_buffer *readFileToFileBuffer(const char *filePath) {
 #ifdef _WIN32
  HANDLE h = CreateFileA(filePath,
--- a/pe-parser-library/src/parse.cpp
+++ b/pe-parser-library/src/parse.cpp
@ -122,6 +122,25 @@ struct parsed_pe_internal {
  std::vector<symbol> symbols;
 };

+// The mapping of Rich header product id / build number pairs
+// to strings
+static const std::map<ProductKey, const std::string> ProductMap = {
+  {std::make_pair(1, 0), "Imported Functions"}
+};
+
+static const std::string kUnknownProduct = "<unknown>";
+
+// Resolve a Rich header product id / build number pair to a known
+// product name
+const std::string& GetRichProductName(std::uint16_t prodId, std::uint16_t buildNum) {
+  auto it = ProductMap.find(std::make_pair(prodId, buildNum));
+  if (it != ProductMap.end()) {
+    return it->second;
+  } else {
+    return kUnknownProduct;
+  }
+}
+
 std::uint32_t err = 0;
 std::string err_loc;

@ -243,6 +262,14 @@ bool getSecForVA(const std::vector<section> &secs, VA v, section &sec) {
  return false;
 }

+void IterRich(parsed_pe *pe, iterRich cb, void *cbd) {
+  for (rich_entry r : pe->peHeader.rich.Entries) {
+    if (cb(cbd, r) != 0) {
+      break;
+    }
+  }
+}
+
 void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
  parsed_pe_internal *pint = pe->internal;

@ -256,19 +283,23 @@ void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd) {
 }

 bool parse_resource_id(bounded_buffer *data, std::uint32_t id, std::string &result) {
-  std::uint8_t c;
  std::uint16_t len;
-
  if (!readWord(data, id, len)) {
    return false;
  }
  id += 2;
-  for (std::uint32_t i = 0; i < len * 2U; i++) {
-    if (!readByte(data, id + i, c)) {
+
+  std::uint32_t rawSize = len * 2U;
+  UCharString rawString;
+  for (std::uint32_t i = 0; i < rawSize; i += 2) {
+    char16_t c;
+    if (!readChar16(data, id + i, c)) {
      return false;
    }
-    result.push_back(static_cast<char>(c));
+    rawString.push_back(c);
  }
+
+  result = from_utf16(rawString);
  return true;
 }

@ -794,6 +825,83 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
  return true;
 }

+bool readRichHeader(bounded_buffer *rich_buf, std::uint32_t key, rich_header &rich_hdr) {
+  if (rich_buf == nullptr) {
+    return false;
+  }
+
+  std::uint32_t encrypted_dword;
+  std::uint32_t decrypted_dword;
+
+  // Confirm DanS signature exists first.
+  // The first decrypted DWORD value of the rich header
+  // at offset 0 should be 0x536e6144 aka the "DanS" signature
+  if (!readDword(rich_buf, 0, encrypted_dword)) {
+      PE_ERR(PEERR_READ);
+      return false;
+  }
+
+  decrypted_dword = encrypted_dword ^ key;
+
+  if (decrypted_dword == RICH_MAGIC_START) {
+    // DanS magic found
+    rich_hdr.isPresent = true;
+    rich_hdr.StartSignature = decrypted_dword;
+  } else {
+    // DanS magic not found
+    rich_hdr.isPresent = false;
+    return false;
+  }
+
+  // Iterate over the remaining entries.
+  // Start from buffer offset 16 because after "DanS" there
+  // are three DWORDs of zero padding that can be skipped over.
+  // a DWORD is 4 bytes. Loop is incrementing 8 bytes, however
+  // we are reading two DWORDS at a time, which is the size
+  // of one rich header entry.
+  for (std::uint32_t i = 16; i < rich_buf->bufLen-8; i += 8) {
+    rich_entry entry;
+    // Read first DWORD of entry and decrypt it
+    if (!readDword(rich_buf, i, encrypted_dword)) {
+      PE_ERR(PEERR_READ);
+      return false;
+    }
+    decrypted_dword = encrypted_dword ^ key;
+    // The high WORD of the first DWORD is the Product ID
+    entry.ProductId = (decrypted_dword & 0xFFFF0000) >> 16;
+    // The low WORD of the first DWORD is the Build Number
+    entry.BuildNumber = (decrypted_dword & 0xFFFF);
+
+    // The second DWORD represents the use count
+    if (!readDword(rich_buf, i+4, encrypted_dword)) {
+      PE_ERR(PEERR_READ);
+      return false;
+    }
+    decrypted_dword = encrypted_dword ^ key;
+    // The full 32-bit DWORD is the count
+    entry.Count = decrypted_dword;
+
+    // Preserve the individual entry
+    rich_hdr.Entries.push_back(entry);
+
+  }
+
+  // Preserve the end signature aka "Rich" magic
+  if (!readDword(rich_buf, rich_buf->bufLen-4, rich_hdr.EndSignature)) {
+      PE_ERR(PEERR_READ);
+      return false;
+  };
+  if (rich_hdr.EndSignature != RICH_MAGIC_END) {
+    PE_ERR(PEERR_MAGIC);
+    return false;
+  }
+
+  // Preserve the decryption key
+  rich_hdr.DecryptionKey =  key;
+
+  return true;
+}
+
 bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) {
  if (file == nullptr) {
    return false;
@ -819,6 +927,53 @@ bool getHeader(bounded_buffer *file, pe_header &p, bounded_buffer *&rem) {
  }
  curOffset += offset;

+  // read rich header
+  std::uint32_t dword;
+  std::uint32_t rich_end_signature_offset;
+  std::uint32_t xor_key;
+  bool found_rich = false;
+
+  // Start reading from RICH_OFFSET (0x80), a known Rich header offset.
+  // Note: 0x80 is based on anecdotal evidence.
+  //
+  // Iterate over the DWORDs, hence why i increments 4 bytes at a time.
+  for (std::uint32_t i = RICH_OFFSET; i < offset; i += 4) {
+    if (!readDword(file, i, dword)) {
+      PE_ERR(PEERR_READ);
+      return false;
+    }
+
+    // Found the trailing Rich signature
+    if (dword == RICH_MAGIC_END) {
+      found_rich = true;
+      rich_end_signature_offset = i;
+      break;
+    }
+  }
+
+  if (found_rich) {
+    // Get the XOR decryption key.  It is the DWORD immediately
+    // after the Rich signature.
+    if (!readDword(file, rich_end_signature_offset + 4, xor_key)) {
+      PE_ERR(PEERR_READ);
+      return false;
+    }
+
+    // Split the Rich header out into its own buffer
+    bounded_buffer *richBuf = splitBuffer(file, 0x80, rich_end_signature_offset + 4);
+    if (richBuf == nullptr) {
+      return false;
+    }
+
+    readRichHeader(richBuf, xor_key, p.rich);
+    if (richBuf != nullptr) {
+      deleteBuffer(richBuf);
+    }
+
+  } else {
+    p.rich.isPresent = false;
+  }
+
  // now, we can read out the fields of the NT headers
  bounded_buffer *ntBuf = splitBuffer(file, curOffset, file->bufLen);

--- a/pe-parser-library/src/unicode_codecvt.cpp
+++ b/pe-parser-library/src/unicode_codecvt.cpp
@ -0,0 +1,47 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2019 Trail of Bits, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <parser-library/to_string.h>
+#include <locale>
+#include <codecvt>
+
+namespace peparse {
+// See https://stackoverflow.com/questions/38688417/utf-conversion-functions-in-c11
+std::string from_utf16(const UCharString &u)
+{
+#if defined(_MSC_VER)
+  // std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>convert; // Doesn't compile with Visual Studio.
+  // See https://stackoverflow.com/questions/32055357/visual-studio-c-2015-stdcodecvt-with-char16-t-or-char32-t
+  std::wstring_convert<std::codecvt_utf8<std::int16_t>, std::int16_t> convert;
+  auto p = reinterpret_cast<const std::int16_t *>(u.data());
+  return convert.to_bytes(p, p + u.size());
+#else
+  // -std=c++11 or -std=c++14
+  // Requires GCC 5 or higher
+  // Requires Clang ??? or higher (tested on Clang 3.8, 5.0, 6.0)
+  std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
+  return convert.to_bytes(u);
+#endif
+}
+} // namespace peparse
--- a/pe-parser-library/src/unicode_icu.cpp
+++ b/pe-parser-library/src/unicode_icu.cpp
@ -0,0 +1,36 @@
+/*
+The MIT License (MIT)
+
+Copyright (c) 2019 Trail of Bits, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <parser-library/to_string.h>
+#include <unicode/unistr.h>
+
+namespace peparse {
+std::string from_utf16(const UCharString &u)
+{
+  icu::UnicodeString utf16_string = icu::UnicodeString(u.data(), u.length());
+  std::string result;
+  utf16_string.toUTF8String(result);
+  return result;
+}
+} // namespace peparse
--- a/python/pepy.cpp
+++ b/python/pepy.cpp
@ -885,15 +885,6 @@ int resource_callback(void *cbd, resource r) {
   * The tuple item order is important here. It is passed into the
   * section type initialization and parsed there.
   */
-#if PY_MAJOR_VERSION >= 3
-  tuple = Py_BuildValue("u#u#u#IIIIIIO&",
-                        r.type_str.c_str(),
-                        r.type_str.length() / 2,
-                        r.name_str.c_str(),
-                        r.name_str.length() / 2,
-                        r.lang_str.c_str(),
-                        r.lang_str.length() / 2,
-#else
  tuple = Py_BuildValue("s#s#s#IIIIIIO&",
                        r.type_str.c_str(),
                        r.type_str.length(),
@ -901,7 +892,6 @@ int resource_callback(void *cbd, resource r) {
                        r.name_str.length(),
                        r.lang_str.c_str(),
                        r.lang_str.length(),
-#endif
                        r.type,
                        r.name,
                        r.lang,
--- a/travis.sh
+++ b/travis.sh
@ -57,7 +57,7 @@ linux_initialize() {
  fi

  printf " > Installing the required packages...\n"
-  sudo apt-get install -qqy cmake python2.7 python-dev build-essential realpath > "${log_file}" 2>&1
+  sudo apt-get install -qqy cmake python2.7 python-dev build-essential realpath libicu-dev > "${log_file}" 2>&1
  if [ $? -ne 0 ] ; then
    printf " x Could not install the required dependencies\n\n\n"
    cat "${log_file}"