From 8736072cc1938db29b07d203333b98d2b4dbc67b Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 14 Apr 2020 10:20:16 -0400 Subject: [PATCH] pe-parser-library: Use WinAPI for UTF-16 to UTF-8 (#130) * pe-parser-library: Use WinAPI for UTF-16 to UTF-8 If ICU isn't available and we're on C++17 or later, use the Windows API for Unicode conversion instead of codecvt. --- .github/workflows/ci.yml | 17 ++++++ CMakeLists.txt | 10 ++++ examples/peaddrconv/main.cpp | 4 +- pe-parser-library/CMakeLists.txt | 43 ++++++++------ .../include/parser-library/parse.h | 19 +++++-- .../include/parser-library/to_string.h | 4 +- pe-parser-library/src/unicode_winapi.cpp | 56 +++++++++++++++++++ 7 files changed, 128 insertions(+), 25 deletions(-) create mode 100644 pe-parser-library/src/unicode_winapi.cpp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6ec3da6..0146434 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,23 @@ on: - cron: '0 12 * * *' jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: deps + run: | + sudo apt-get update + sudo apt-get install -y clang-format-9 + + - name: lint + run: | + mkdir build && cd build + cmake .. + cmake --build . --target format + cd .. && git diff --exit-code + pe-parse: strategy: matrix: diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d2b11e..da840a0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,16 @@ if (BUILD_COMMAND_LINE_TOOLS) add_subdirectory(dump-pe) endif () +# `format` target. +file(GLOB_RECURSE PEPARSE_ALL_SOURCES *.cpp *.h) +add_custom_target( + format + COMMAND clang-format -i -style=file ${PEPARSE_ALL_SOURCES} + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMENT "Auto-format the codebase with clang-format" + VERBATIM +) + message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") message(STATUS "Build Shared: ${BUILD_SHARED_LIBS} ${BUILD_SHARED_LIBS_MESSAGE}") message(STATUS "Build Command Line Tools: ${BUILD_COMMAND_LINE_TOOLS}") diff --git a/examples/peaddrconv/main.cpp b/examples/peaddrconv/main.cpp index 3bb0741..437379e 100644 --- a/examples/peaddrconv/main.cpp +++ b/examples/peaddrconv/main.cpp @@ -243,7 +243,9 @@ bool convertAddress(ParsedPeRef &pe, result); } - default: { return false; } + default: { + return false; + } } } diff --git a/pe-parser-library/CMakeLists.txt b/pe-parser-library/CMakeLists.txt index 8227420..2bcba55 100644 --- a/pe-parser-library/CMakeLists.txt +++ b/pe-parser-library/CMakeLists.txt @@ -29,26 +29,30 @@ list(APPEND PEPARSERLIB_SOURCEFILES # Check for codecvt support. Likely the proper way to do this would be to # use CMake system inspection via methods like "try_compile" to determine # if the "#include " directive compiles successfully. -if (MSVC) - if (MSVC_VERSION LESS 1900) - set(CODECVT_SUPPORTED OFF) +if(CXX_STANDARD GREATER_EQUAL 17) + set(CODECVT_SUPPORTED OFF) +else() + if (MSVC) + if (MSVC_VERSION LESS 1900) + set(CODECVT_SUPPORTED OFF) + else () + set(CODECVT_SUPPORTED ON) + endif () + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") + set(CODECVT_SUPPORTED OFF) + else () + set(CODECVT_SUPPORTED ON) + endif () else () - set(CODECVT_SUPPORTED ON) + find_path(CODECVT_INCLUDE_DIR NAMES "codecvt") + if (CODECVT_INCLUDE_DIR) + set(CODECVT_SUPPORTED OFF) + else () + set(CODECVT_SUPPORTED ON) + endif () endif () -elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") - set(CODECVT_SUPPORTED OFF) - else () - set(CODECVT_SUPPORTED ON) - endif () -else () - find_path(CODECVT_INCLUDE_DIR NAMES "codecvt") - if (CODECVT_INCLUDE_DIR) - set(CODECVT_SUPPORTED OFF) - else () - set(CODECVT_SUPPORTED ON) - endif () -endif () +endif() if(${UNICODE_LIBRARY} MATCHES "icu") find_package(ICU ${ICU_MINIMUM_REQUIRED} COMPONENTS uc REQUIRED) @@ -64,6 +68,9 @@ else() if(ICU_FOUND) add_definitions(-DUSE_ICU4C) list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_icu.cpp) + elseif(MSVC) + add_definitions(-DUSE_STRINGAPISET) + list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_winapi.cpp) elseif(CODECVT_SUPPORTED) list(APPEND PEPARSERLIB_SOURCEFILES src/unicode_codecvt.cpp) else() diff --git a/pe-parser-library/include/parser-library/parse.h b/pe-parser-library/include/parser-library/parse.h index 2e839df..5cb4616 100644 --- a/pe-parser-library/include/parser-library/parse.h +++ b/pe-parser-library/include/parser-library/parse.h @@ -32,7 +32,7 @@ THE SOFTWARE. #include "to_string.h" #ifdef _MSC_VER -#define __typeof__(x) std::remove_reference < decltype(x)> ::type +#define __typeof__(x) std::remove_reference::type #endif #define PE_ERR(x) \ @@ -198,7 +198,10 @@ typedef int (*iterRsrc)(void *, const resource &); void IterRsrc(parsed_pe *pe, iterRsrc cb, void *cbd); // iterate over the imports by RVA and string -typedef int (*iterVAStr)(void *, const VA &, const std::string &, const std::string &); +typedef int (*iterVAStr)(void *, + const VA &, + const std::string &, + const std::string &); void IterImpVAString(parsed_pe *pe, iterVAStr cb, void *cbd); // iterate over relocations in the PE file @@ -216,12 +219,18 @@ typedef int (*iterSymbol)(void *, void IterSymbols(parsed_pe *pe, iterSymbol cb, void *cbd); // iterate over the exports -typedef int (*iterExp)(void *, const VA &, const std::string &, const std::string &); +typedef int (*iterExp)(void *, + const VA &, + const std::string &, + const std::string &); void IterExpVA(parsed_pe *pe, iterExp cb, void *cbd); // iterate over sections -typedef int (*iterSec)( - void *, const VA &, const std::string &, const image_section_header &, const bounded_buffer *); +typedef int (*iterSec)(void *, + const VA &, + const std::string &, + const image_section_header &, + const bounded_buffer *); void IterSec(parsed_pe *pe, iterSec cb, void *cbd); // get byte at VA in PE diff --git a/pe-parser-library/include/parser-library/to_string.h b/pe-parser-library/include/parser-library/to_string.h index a5fc1d2..23e16fb 100644 --- a/pe-parser-library/include/parser-library/to_string.h +++ b/pe-parser-library/include/parser-library/to_string.h @@ -3,9 +3,11 @@ #include #include -#ifdef USE_ICU4C +#if defined(USE_ICU4C) #include typedef std::basic_string UCharString; +#elif defined(USE_STRINGAPISET) +typedef std::basic_string UCharString; #else typedef std::u16string UCharString; #endif diff --git a/pe-parser-library/src/unicode_winapi.cpp b/pe-parser-library/src/unicode_winapi.cpp new file mode 100644 index 0000000..6160f2a --- /dev/null +++ b/pe-parser-library/src/unicode_winapi.cpp @@ -0,0 +1,56 @@ +/* +The MIT License (MIT) + +Copyright (c) 2020 Trail of Bits, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +namespace peparse { +std::string from_utf16(const UCharString &u) { + std::string result; + std::size_t size = WideCharToMultiByte(CP_UTF8, + 0, + u.data(), + static_cast(u.size()), + nullptr, + 0, + nullptr, + nullptr); + + if (size <= 0) { + return result; + } + + result.reserve(size); + WideCharToMultiByte(CP_UTF8, + 0, + u.data(), + static_cast(u.size()), + &result[0], + static_cast(result.capacity()), + nullptr, + nullptr); + + return result; +} +} // namespace peparse