From 9f74016e0deb3e5052bdc8f9a957c643fcc019cd Mon Sep 17 00:00:00 2001
From: Jozef Kolek <jkolek@gmail.com>
Date: Wed, 12 Apr 2017 00:24:03 +0200
Subject: [PATCH] Perform endian swap as needed

---
 parser-library/buffer.cpp   | 47 ++++++++++++++++++++++++++++++-------
 parser-library/nt-headers.h | 43 +++++++++++++++++++++++++++++++++
 parser-library/parse.cpp    | 30 +++++++++++++++++++++++
 parser-library/parse.h      |  4 ++++
 4 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/parser-library/buffer.cpp b/parser-library/buffer.cpp
index 309d6ab..90fb98f 100644
--- a/parser-library/buffer.cpp
+++ b/parser-library/buffer.cpp
@@ -36,6 +36,28 @@ THE SOFTWARE.
 #include <unistd.h>
 #endif
 
+namespace {
+
+inline uint16_t byteSwapUint16(uint16_t val) {
+  uint16_t a = (val >> 8) & 0x00FFU;
+  uint16_t b = (val << 8) & 0xFF00U;
+  return a | b;
+}
+
+inline uint32_t byteSwapUint32(uint32_t val) {
+  uint32_t a = byteSwapUint16(val >> 16) & 0x0000FFFFU;
+  uint32_t b = ((static_cast<uint32_t>(byteSwapUint16(val))) << 16) & 0xFFFF0000U;
+  return a | b;
+}
+
+inline uint64_t byteSwapUint64(uint64_t val) {
+  uint64_t a = byteSwapUint32(val >> 32) & 0x00000000FFFFFFFFUL;
+  uint64_t b = ((static_cast<uint64_t>(byteSwapUint32(val))) << 32) & 0xFFFFFFFF00000000UL;
+  return a | b;
+}
+
+} // anonymous namespace
+
 using namespace std;
 
 namespace peparse {
@@ -67,7 +89,6 @@ bool readByte(bounded_buffer *b, ::uint32_t offset, ::uint8_t &out) {
   return true;
 }
 
-// TODO: perform endian swap as needed
 bool readWord(bounded_buffer *b, ::uint32_t offset, ::uint16_t &out) {
   if (b == nullptr) {
     return false;
@@ -78,12 +99,15 @@ bool readWord(bounded_buffer *b, ::uint32_t offset, ::uint16_t &out) {
   }
 
   ::uint16_t *tmp = reinterpret_cast<uint16_t *>(b->buf + offset);
-  out = *tmp;
+  if (b->swapBytes) {
+    out = byteSwapUint16(*tmp);
+  } else {
+    out = *tmp;
+  }
 
   return true;
 }
 
-// TODO: perform endian swap as needed
 bool readDword(bounded_buffer *b, ::uint32_t offset, ::uint32_t &out) {
   if (b == nullptr) {
     return false;
@@ -94,12 +118,15 @@ bool readDword(bounded_buffer *b, ::uint32_t offset, ::uint32_t &out) {
   }
 
   ::uint32_t *tmp = reinterpret_cast<uint32_t *>(b->buf + offset);
-  out = *tmp;
+  if (b->swapBytes) {
+    out = byteSwapUint32(*tmp);
+  } else {
+    out = *tmp;
+  }
 
   return true;
 }
 
-// TODO: perform endian swap as needed
 bool readQword(bounded_buffer *b, ::uint32_t offset, ::uint64_t &out) {
   if (b == nullptr) {
     return false;
@@ -110,7 +137,11 @@ bool readQword(bounded_buffer *b, ::uint32_t offset, ::uint64_t &out) {
   }
 
   ::uint64_t *tmp = reinterpret_cast<uint64_t *>(b->buf + offset);
-  out = *tmp;
+  if (b->swapBytes) {
+    out = byteSwapUint64(*tmp);
+  } else {
+    out = *tmp;
+  }
 
   return true;
 }
@@ -187,7 +218,6 @@ bounded_buffer *readFileToFileBuffer(const char *filePath) {
 
   p->buf = (::uint8_t *) ptr;
   p->bufLen = fileSize;
-  p->copy = false;
 #else
   p->detail->fd = fd;
 
@@ -213,8 +243,9 @@ bounded_buffer *readFileToFileBuffer(const char *filePath) {
 
   p->buf = reinterpret_cast<uint8_t *>(maddr);
   p->bufLen = s.st_size;
-  p->copy = false;
 #endif
+  p->copy = false;
+  p->swapBytes = false;
 
   return p;
 }
diff --git a/parser-library/nt-headers.h b/parser-library/nt-headers.h
index 993ff63..7adb3b6 100644
--- a/parser-library/nt-headers.h
+++ b/parser-library/nt-headers.h
@@ -55,6 +55,49 @@ const std::uint16_t DIR_IAT = 12;
 const std::uint16_t DIR_DELAY_IMPORT = 13;
 const std::uint16_t DIR_COM_DESCRIPTOR = 14;
 
+// Machine Types
+const std::uint16_t IMAGE_FILE_MACHINE_UNKNOWN = 0x0;
+const std::uint16_t IMAGE_FILE_MACHINE_AM33 = 0x1d3;      // Matsushita AM33
+const std::uint16_t IMAGE_FILE_MACHINE_AMD64 = 0x8664;    // x64
+const std::uint16_t IMAGE_FILE_MACHINE_ARM = 0x1c0;       // ARM little endian
+const std::uint16_t IMAGE_FILE_MACHINE_ARM64 = 0xaa64;    // ARM64 little endian
+const std::uint16_t IMAGE_FILE_MACHINE_ARMNT = 0x1c4;     // ARM Thumb-2 little endian
+const std::uint16_t IMAGE_FILE_MACHINE_EBC = 0xebc;       // EFI byte code
+const std::uint16_t IMAGE_FILE_MACHINE_I386 = 0x14c;      // Intel 386 or later processors and compatible processors
+const std::uint16_t IMAGE_FILE_MACHINE_IA64 = 0x200;      // Intel Itanium processor family
+const std::uint16_t IMAGE_FILE_MACHINE_M32R = 0x9041;     // Mitsubishi M32R little endian
+const std::uint16_t IMAGE_FILE_MACHINE_MIPS16 = 0x266;    // MIPS16
+const std::uint16_t IMAGE_FILE_MACHINE_MIPSFPU = 0x366;   // MIPS with FPU
+const std::uint16_t IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466; // MIPS16 with FPU
+const std::uint16_t IMAGE_FILE_MACHINE_POWERPC = 0x1f0;   // Power PC little endian
+const std::uint16_t IMAGE_FILE_MACHINE_POWERPCFP = 0x1f1; // Power PC with floating point support
+const std::uint16_t IMAGE_FILE_MACHINE_R4000 = 0x166;     // MIPS little endian
+const std::uint16_t IMAGE_FILE_MACHINE_RISCV32 = 0x5032;  // RISC-V 32-bit address space
+const std::uint16_t IMAGE_FILE_MACHINE_RISCV64 = 0x5064;  // RISC-V 64-bit address space
+const std::uint16_t IMAGE_FILE_MACHINE_RISCV128 = 0x5128; // RISC-V 128-bit address space
+const std::uint16_t IMAGE_FILE_MACHINE_SH3 = 0x1a2;       // Hitachi SH3
+const std::uint16_t IMAGE_FILE_MACHINE_SH3DSP = 0x1a3;    // Hitachi SH3 DSP
+const std::uint16_t IMAGE_FILE_MACHINE_SH4 = 0x1a6;       // Hitachi SH4
+const std::uint16_t IMAGE_FILE_MACHINE_SH5 = 0x1a8;       // Hitachi SH5
+const std::uint16_t IMAGE_FILE_MACHINE_THUMB = 0x1c2;     // Thumb
+const std::uint16_t IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169; // MIPS little-endian WCE v2
+
+const std::uint16_t IMAGE_FILE_RELOCS_STRIPPED = 0x0001;
+const std::uint16_t IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002;
+const std::uint16_t IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004;
+const std::uint16_t IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008;
+const std::uint16_t IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010;
+const std::uint16_t IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020;
+const std::uint16_t IMAGE_FILE_BYTES_REVERSED_LO = 0x0080;
+const std::uint16_t IMAGE_FILE_32BIT_MACHINE = 0x0100;
+const std::uint16_t IMAGE_FILE_DEBUG_STRIPPED = 0x0200;
+const std::uint16_t IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400;
+const std::uint16_t IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800;
+const std::uint16_t IMAGE_FILE_SYSTEM = 0x1000;
+const std::uint16_t IMAGE_FILE_DLL = 0x2000;
+const std::uint16_t IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000;
+const std::uint16_t IMAGE_FILE_BYTES_REVERSED_HI = 0x8000;
+
 const std::uint32_t IMAGE_SCN_TYPE_NO_PAD = 0x00000008;
 const std::uint32_t IMAGE_SCN_CNT_CODE = 0x00000020;
 const std::uint32_t IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040;
diff --git a/parser-library/parse.cpp b/parser-library/parse.cpp
index d90714f..2dc7c1e 100644
--- a/parser-library/parse.cpp
+++ b/parser-library/parse.cpp
@@ -585,6 +585,36 @@ bool readNtHeader(bounded_buffer *b, nt_header_32 &header) {
     return false;
   }
 
+  if (TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_AMD64,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_ARM,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_ARM64,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_ARMNT,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_I386,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_M32R,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_POWERPC,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_R4000,
+                                   IMAGE_FILE_BYTES_REVERSED_HI) ||
+      TEST_MACHINE_CHARACTERISTICS(header,
+                                   IMAGE_FILE_MACHINE_WCEMIPSV2,
+                                   IMAGE_FILE_BYTES_REVERSED_HI)) {
+    b->swapBytes = true;
+  }
+
   /*
    * The buffer is split using the OptionalHeader offset, even if it turns
    * out to be a PE32+. The start of the buffer is at the same spot in the
diff --git a/parser-library/parse.h b/parser-library/parse.h
index 4a72cd7..842fcf0 100644
--- a/parser-library/parse.h
+++ b/parser-library/parse.h
@@ -76,6 +76,9 @@ THE SOFTWARE.
     return NULL;                                                           \
   }
 
+#define TEST_MACHINE_CHARACTERISTICS(h, m, ch) \
+  ((h.FileHeader.Machine == m) && (h.FileHeader.Characteristics & ch))
+
 namespace peparse {
 
 typedef std::uint32_t RVA;
@@ -87,6 +90,7 @@ typedef struct _bounded_buffer {
   std::uint8_t *buf;
   std::uint32_t bufLen;
   bool copy;
+  bool swapBytes;
   buffer_detail *detail;
 } bounded_buffer;