pe-parser-library: Use WinAPI for UTF-16 to UTF-8 (#130)

* pe-parser-library: Use WinAPI for UTF-16 to UTF-8

If ICU isn't available and we're on C++17 or later, use
the Windows API for Unicode conversion instead of codecvt.
This commit is contained in:
William Woodruff 2020-04-14 10:20:16 -04:00 committed by GitHub
parent 8adf31ac97
commit 8736072cc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 25 deletions

View File

@ -10,6 +10,23 @@ on:
- cron: '0 12 * * *'
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: deps
run: |
sudo apt-get update
sudo apt-get install -y clang-format-9
- name: lint
run: |
mkdir build && cd build
cmake ..
cmake --build . --target format
cd .. && git diff --exit-code
pe-parse:
strategy:
matrix:

View File

@ -26,6 +26,16 @@ if (BUILD_COMMAND_LINE_TOOLS)
add_subdirectory(dump-pe)
endif ()
# `format` target.
file(GLOB_RECURSE PEPARSE_ALL_SOURCES *.cpp *.h)
add_custom_target(
format
COMMAND clang-format -i -style=file ${PEPARSE_ALL_SOURCES}
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
COMMENT "Auto-format the codebase with clang-format"
VERBATIM
)
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Build Shared: ${BUILD_SHARED_LIBS} ${BUILD_SHARED_LIBS_MESSAGE}")
message(STATUS "Build Command Line Tools: ${BUILD_COMMAND_LINE_TOOLS}")

View File

@ -243,7 +243,9 @@ bool convertAddress(ParsedPeRef &pe,
result);
}
default: { return false; }
default: {
return false;
}
}
}

View File

@ -29,26 +29,30 @@ list(APPEND PEPARSERLIB_SOURCEFILES
# Check for codecvt support. Likely the proper way to do this would be to
# use CMake system inspection via methods like "try_compile" to determine
# if the "#include <codecvt>" directive compiles successfully.
if (MSVC)
if (MSVC_VERSION LESS 1900)
set(CODECVT_SUPPORTED OFF)
if(CXX_STANDARD GREATER_EQUAL 17)
set(CODECVT_SUPPORTED OFF)
else()
if (MSVC)
if (MSVC_VERSION LESS 1900)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
else ()
set(CODECVT_SUPPORTED ON)
find_path(CODECVT_INCLUDE_DIR NAMES "codecvt")
if (CODECVT_INCLUDE_DIR)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
else ()
find_path(CODECVT_INCLUDE_DIR NAMES "codecvt")
if (CODECVT_INCLUDE_DIR)
set(CODECVT_SUPPORTED OFF)
else ()
set(CODECVT_SUPPORTED ON)
endif ()
endif ()
endif()
if(${UNICODE_LIBRARY} MATCHES "icu")
find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc REQUIRED)
@ -64,6 +68,9 @@ else()
if(ICU_FOUND)
add_definitions(-DUSE_ICU4C)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp)
elseif(MSVC)
add_definitions(-DUSE_STRINGAPISET)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_winapi.cpp)
elseif(CODECVT_SUPPORTED)
list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp)
else()

View File

@ -32,7 +32,7 @@ THE SOFTWARE.
#include "to_string.h"
#ifdef _MSC_VER
#define __typeof__(x) std::remove_reference < decltype(x)> ::type
#define __typeof__(x) std::remove_reference<decltype(x)>::type
#endif
#define PE_ERR(x) \
@ -198,7 +198,10 @@ typedef int (*iterRsrc)(void *, const resource &);
void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd);
// iterate over the imports by RVA and string
typedef int (*iterVAStr)(void *, const VA &, const std::string &, const std::string &);
typedef int (*iterVAStr)(void *,
const VA &,
const std::string &,
const std::string &);
void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd);
// iterate over relocations in the PE file
@ -216,12 +219,18 @@ typedef int (*iterSymbol)(void *,
void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd);
// iterate over the exports
typedef int (*iterExp)(void *, const VA &, const std::string &, const std::string &);
typedef int (*iterExp)(void *,
const VA &,
const std::string &,
const std::string &);
void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd);
// iterate over sections
typedef int (*iterSec)(
void *, const VA &, const std::string &, const image_section_header &, const bounded_buffer *);
typedef int (*iterSec)(void *,
const VA &,
const std::string &,
const image_section_header &,
const bounded_buffer *);
void IterSec(parsed_pe *pe, iterSec cb, void *cbd);
// get byte at VA in PE

View File

@ -3,9 +3,11 @@
#include <sstream>
#include <string>
#ifdef USE_ICU4C
#if defined(USE_ICU4C)
#include <unicode/unistr.h>
typedef std::basic_string<UChar> UCharString;
#elif defined(USE_STRINGAPISET)
typedef std::basic_string<wchar_t> UCharString;
#else
typedef std::u16string UCharString;
#endif

View File

@ -0,0 +1,56 @@
/*
The MIT License (MIT)
Copyright (c) 2020 Trail of Bits, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <Windows.h>
#include <parser-library/to_string.h>
namespace peparse {
std::string from_utf16(const UCharString &u) {
std::string result;
std::size_t size = WideCharToMultiByte(CP_UTF8,
0,
u.data(),
static_cast<int>(u.size()),
nullptr,
0,
nullptr,
nullptr);
if (size <= 0) {
return result;
}
result.reserve(size);
WideCharToMultiByte(CP_UTF8,
0,
u.data(),
static_cast<int>(u.size()),
&result[0],
static_cast<int>(result.capacity()),
nullptr,
nullptr);
return result;
}
} // namespace peparse