From 327a524de1a32000ffd5110b024d09ebef22aae0 Mon Sep 17 00:00:00 2001
From: passthecilantro <65034117+passthecilantro@users.noreply.github.com>
Date: Fri, 25 Sep 2020 15:48:28 -0400
Subject: [PATCH] Fix #1115 (#139)

Co-authored-by: William Woodruff <william.woodruff@trailofbits.com>
Co-authored-by: William Woodruff <william@trailofbits.com>
---
 CMakeLists.txt                   |   5 +
 dump-pe/main.cpp                 |  15 +-
 dump-pe/vendor/argh.h            | 463 +++++++++++++++++++++++++++++++
 pe-parser-library/CMakeLists.txt |   4 -
 4 files changed, 480 insertions(+), 7 deletions(-)
 create mode 100644 dump-pe/vendor/argh.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 79155de..ff1f191 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,10 @@ if (MSVC)
   set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 endif ()
 
+file(READ "${PROJECT_SOURCE_DIR}/VERSION" PEPARSE_VERSION)
+string(STRIP "${PEPARSE_VERSION}" PEPARSE_VERSION)
+add_compile_definitions(PEPARSE_VERSION="${PEPARSE_VERSION}")
+
 add_subdirectory(pe-parser-library)
 
 if (BUILD_COMMAND_LINE_TOOLS)
@@ -38,6 +42,7 @@ file(
         pe-parser-library/*.h
         pepy/*.cpp
         pepy/*.h
+        dump-pe/*.cpp
         examples/*.cpp
         examples/*.h
 )
diff --git a/dump-pe/main.cpp b/dump-pe/main.cpp
index eb24200..f945c85 100644
--- a/dump-pe/main.cpp
+++ b/dump-pe/main.cpp
@@ -29,6 +29,8 @@ THE SOFTWARE.
 
 #include <pe-parse/parse.h>
 
+#include "vendor/argh.h"
+
 using namespace peparse;
 
 int printExps(void *N,
@@ -295,14 +297,21 @@ int printSecs(void *N,
   std::cout << std::boolalpha << static_cast<bool>(p->peHeader.x) << "\n";
 
 int main(int argc, char *argv[]) {
-  if (argc != 2 || (argc == 2 && std::strcmp(argv[1], "--help") == 0)) {
+
+  argh::parser cmdl(argv);
+
+  if (cmdl[{"-h", "--help"}] || argc <= 1) {
     std::cout << "dump-pe utility from Trail of Bits\n";
     std::cout << "Repository: https://github.com/trailofbits/pe-parse\n\n";
     std::cout << "Usage:\n\tdump-pe /path/to/executable.exe\n";
-    return 1;
+    return 0;
+  } else if (cmdl[{"-v", "--version"}]) {
+    std::cout << "dump-pe (pe-parse) version " << PEPARSE_VERSION << "\n";
+    return 0;
   }
 
-  parsed_pe *p = ParsePEFromFile(argv[1]);
+  parsed_pe *p = ParsePEFromFile(cmdl[1].c_str());
+
   if (p == nullptr) {
     std::cout << "Error: " << GetPEErr() << " (" << GetPEErrString() << ")"
               << "\n";
diff --git a/dump-pe/vendor/argh.h b/dump-pe/vendor/argh.h
new file mode 100644
index 0000000..7e03a18
--- /dev/null
+++ b/dump-pe/vendor/argh.h
@@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 2016, Adi Shavit
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of  nor the names of its contributors may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <sstream>
+#include <limits>
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <cassert>
+
+namespace argh
+{
+   // Terminology:
+   // A command line is composed of 2 types of args:
+   // 1. Positional args, i.e. free standing values
+   // 2. Options: args beginning with '-'. We identify two kinds:
+   //    2.1: Flags: boolean options =>  (exist ? true : false)
+   //    2.2: Parameters: a name followed by a non-option value
+
+#if !defined(__GNUC__) || (__GNUC__ >= 5)
+   using string_stream = std::istringstream;
+#else
+    // Until GCC 5, istringstream did not have a move constructor.
+    // stringstream_proxy is used instead, as a workaround.
+   class stringstream_proxy
+   {
+   public:
+      stringstream_proxy() = default;
+
+      // Construct with a value.
+      stringstream_proxy(std::string const& value) :
+         stream_(value)
+      {}
+
+      // Copy constructor.
+      stringstream_proxy(const stringstream_proxy& other) :
+         stream_(other.stream_.str())
+      {
+         stream_.setstate(other.stream_.rdstate());
+      }
+
+      void setstate(std::ios_base::iostate state) { stream_.setstate(state); }
+
+      // Stream out the value of the parameter.
+      // If the conversion was not possible, the stream will enter the fail state,
+      // and operator bool will return false.
+      template<typename T>
+      stringstream_proxy& operator >> (T& thing)
+      {
+         stream_ >> thing;
+         return *this;
+      }
+
+
+      // Get the string value.
+      std::string str() const { return stream_.str(); }
+
+      std::stringbuf* rdbuf() const { return stream_.rdbuf(); }
+
+      // Check the state of the stream.
+      // False when the most recent stream operation failed
+      explicit operator bool() const { return !!stream_; }
+
+      ~stringstream_proxy() = default;
+   private:
+      std::istringstream stream_;
+   };
+   using string_stream = stringstream_proxy;
+#endif
+
+   class parser
+   {
+   public:
+      enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0,
+                  PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1,
+                  NO_SPLIT_ON_EQUALSIGN = 1 << 2,
+                  SINGLE_DASH_IS_MULTIFLAG = 1 << 3,
+                };
+
+      parser() = default;
+
+      parser(std::initializer_list<char const* const> pre_reg_names)
+      {  add_params(pre_reg_names); }
+
+      parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
+      {  parse(argv, mode); }
+
+      parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
+      {  parse(argc, argv, mode); }
+
+      void add_param(std::string const& name);
+      void add_params(std::initializer_list<char const* const> init_list);
+
+      void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
+      void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
+
+      std::multiset<std::string>          const& flags()    const { return flags_;    }
+      std::map<std::string, std::string>  const& params()   const { return params_;   }
+      std::vector<std::string>            const& pos_args() const { return pos_args_; }
+
+      // begin() and end() for using range-for over positional args.
+      std::vector<std::string>::const_iterator begin() const { return pos_args_.cbegin(); }
+      std::vector<std::string>::const_iterator end()   const { return pos_args_.cend();   }
+      size_t size()                                    const { return pos_args_.size();   }
+
+      //////////////////////////////////////////////////////////////////////////
+      // Accessors
+
+      // flag (boolean) accessors: return true if the flag appeared, otherwise false.
+      bool operator[](std::string const& name) const;
+
+      // multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false.
+      bool operator[](std::initializer_list<char const* const> init_list) const;
+
+      // returns positional arg string by order. Like argv[] but without the options
+      std::string const& operator[](size_t ind) const;
+
+      // returns a std::istream that can be used to convert a positional arg to a typed value.
+      string_stream operator()(size_t ind) const;
+
+      // same as above, but with a default value in case the arg is missing (index out of range).
+      template<typename T>
+      string_stream operator()(size_t ind, T&& def_val) const;
+
+      // parameter accessors, give a name get an std::istream that can be used to convert to a typed value.
+      // call .str() on result to get as string
+      string_stream operator()(std::string const& name) const;
+
+      // accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value.
+      // call .str() on result to get as string
+      // returns the first value in the list to be found.
+      string_stream operator()(std::initializer_list<char const* const> init_list) const;
+
+      // same as above, but with a default value in case the param was missing.
+      // Non-string def_val types must have an operator<<() (output stream operator)
+      // If T only has an input stream operator, pass the string version of the type as in "3" instead of 3.
+      template<typename T>
+      string_stream operator()(std::string const& name, T&& def_val) const;
+
+      // same as above but for a list of names. returns the first value to be found.
+      template<typename T>
+      string_stream operator()(std::initializer_list<char const* const> init_list, T&& def_val) const;
+
+   private:
+      string_stream bad_stream() const;
+      std::string trim_leading_dashes(std::string const& name) const;
+      bool is_number(std::string const& arg) const;
+      bool is_option(std::string const& arg) const;
+      bool got_flag(std::string const& name) const;
+      bool is_param(std::string const& name) const;
+
+   private:
+      std::vector<std::string> args_;
+      std::map<std::string, std::string> params_;
+      std::vector<std::string> pos_args_;
+      std::multiset<std::string> flags_;
+      std::set<std::string> registeredParams_;
+      std::string empty_;
+   };
+
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::parse(const char * const argv[], int mode)
+   {
+      int argc = 0;
+      for (auto argvp = argv; *argvp; ++argc, ++argvp);
+      parse(argc, argv, mode);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/)
+   {
+      // convert to strings
+      args_.resize(static_cast<decltype(args_)::size_type>(argc));
+      std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg;  });
+
+      // parse line
+      for (auto i = 0u; i < args_.size(); ++i)
+      {
+         if (!is_option(args_[i]))
+         {
+            pos_args_.emplace_back(args_[i]);
+            continue;
+         }
+
+         auto name = trim_leading_dashes(args_[i]);
+
+         if (!(mode & NO_SPLIT_ON_EQUALSIGN))
+         {
+            auto equalPos = name.find('=');
+            if (equalPos != std::string::npos)
+            {
+               params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) });
+               continue;
+            }
+         }
+
+         // if the option is unregistered and should be a multi-flag
+         if (1 == (args_[i].size() - name.size()) &&         // single dash
+            argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode
+            !is_param(name))                                  // unregistered
+         {
+            std::string keep_param;
+
+            if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param
+            {
+               keep_param += name.back();
+               name.resize(name.size() - 1);
+            }
+
+            for (auto const& c : name)
+            {
+               flags_.emplace(std::string{ c });
+            }
+
+            if (!keep_param.empty())
+            {
+               name = keep_param;
+            }
+            else
+            {
+               continue; // do not consider other options for this arg
+            }
+         }
+
+         // any potential option will get as its value the next arg, unless that arg is an option too
+         // in that case it will be determined a flag.
+         if (i == args_.size() - 1 || is_option(args_[i + 1]))
+         {
+            flags_.emplace(name);
+            continue;
+         }
+
+         // if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped
+         // otherwise we have 2 modes:
+         // PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag.
+         //                               The following value (the next arg) will be a free parameter.
+         //
+         // PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg
+         //                                will be the value of that option.
+
+         assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION)
+             || !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION));
+
+         bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION;
+
+         if (is_param(name) || preferParam)
+         {
+            params_.insert({ name, args_[i + 1] });
+            ++i; // skip next value, it is not a free parameter
+            continue;
+         }
+         else
+         {
+            flags_.emplace(name);
+         }
+      };
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::bad_stream() const
+   {
+      string_stream bad;
+      bad.setstate(std::ios_base::failbit);
+      return bad;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::is_number(std::string const& arg) const
+   {
+      // inefficient but simple way to determine if a string is a number (which can start with a '-')
+      std::istringstream istr(arg);
+      double number;
+      istr >> number;
+      return !(istr.fail() || istr.bad());
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::is_option(std::string const& arg) const
+   {
+      assert(0 != arg.size());
+      if (is_number(arg))
+         return false;
+      return '-' == arg[0];
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline std::string parser::trim_leading_dashes(std::string const& name) const
+   {
+      auto pos = name.find_first_not_of('-');
+      return std::string::npos != pos ? name.substr(pos) : name;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool argh::parser::got_flag(std::string const& name) const
+   {
+      return flags_.end() != flags_.find(trim_leading_dashes(name));
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool argh::parser::is_param(std::string const& name) const
+   {
+      return registeredParams_.count(name);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::operator[](std::string const& name) const
+   {
+      return got_flag(name);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::operator[](std::initializer_list<char const* const> init_list) const
+   {
+      return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); });
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline std::string const& parser::operator[](size_t ind) const
+   {
+      if (ind < pos_args_.size())
+         return pos_args_[ind];
+      return empty_;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(std::string const& name) const
+   {
+      auto optIt = params_.find(trim_leading_dashes(name));
+      if (params_.end() != optIt)
+         return string_stream(optIt->second);
+      return bad_stream();
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(std::initializer_list<char const* const> init_list) const
+   {
+      for (auto& name : init_list)
+      {
+         auto optIt = params_.find(trim_leading_dashes(name));
+         if (params_.end() != optIt)
+            return string_stream(optIt->second);
+      }
+      return bad_stream();
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   template<typename T>
+   string_stream parser::operator()(std::string const& name, T&& def_val) const
+   {
+      auto optIt = params_.find(trim_leading_dashes(name));
+      if (params_.end() != optIt)
+         return string_stream(optIt->second);
+
+      std::ostringstream ostr;
+      ostr.precision(std::numeric_limits<long double>::max_digits10);
+      ostr << def_val;
+      return string_stream(ostr.str()); // use default
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   // same as above but for a list of names. returns the first value to be found.
+   template<typename T>
+   string_stream parser::operator()(std::initializer_list<char const* const> init_list, T&& def_val) const
+   {
+      for (auto& name : init_list)
+      {
+         auto optIt = params_.find(trim_leading_dashes(name));
+         if (params_.end() != optIt)
+            return string_stream(optIt->second);
+      }
+      std::ostringstream ostr;
+      ostr.precision(std::numeric_limits<long double>::max_digits10);
+      ostr << def_val;
+      return string_stream(ostr.str()); // use default
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(size_t ind) const
+   {
+      if (pos_args_.size() <= ind)
+         return bad_stream();
+
+      return string_stream(pos_args_[ind]);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   template<typename T>
+   string_stream parser::operator()(size_t ind, T&& def_val) const
+   {
+      if (pos_args_.size() <= ind)
+      {
+         std::ostringstream ostr;
+         ostr.precision(std::numeric_limits<long double>::max_digits10);
+         ostr << def_val;
+         return string_stream(ostr.str());
+      }
+
+      return string_stream(pos_args_[ind]);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::add_param(std::string const& name)
+   {
+      registeredParams_.insert(trim_leading_dashes(name));
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::add_params(std::initializer_list<char const* const> init_list)
+   {
+      for (auto& name : init_list)
+         registeredParams_.insert(trim_leading_dashes(name));
+   }
+}
diff --git a/pe-parser-library/CMakeLists.txt b/pe-parser-library/CMakeLists.txt
index 468891b..1962f79 100644
--- a/pe-parser-library/CMakeLists.txt
+++ b/pe-parser-library/CMakeLists.txt
@@ -3,10 +3,6 @@ project(pe-parse)
 
 message(STATUS "VERSION file: ${PROJECT_SOURCE_DIR}/../VERSION")
 
-file(READ "${PROJECT_SOURCE_DIR}/../VERSION" PEPARSE_VERSION)
-string(STRIP "${PEPARSE_VERSION}" PEPARSE_VERSION)
-add_compile_definitions(PEPARSE_VERSION="${PEPARSE_VERSION}")
-
 # List all files explicitly; this will make IDEs happy (i.e. QtCreator, CLion, ...)
 list(APPEND PEPARSERLIB_SOURCEFILES
   include/pe-parse/parse.h