From 9036a2405dc44726f40cb77cab1bcbf371ab7a70 Mon Sep 17 00:00:00 2001 From: Romain Thomas Date: Fri, 22 Sep 2017 14:42:39 +0200 Subject: [PATCH] Bug fix in dynamic symbols counting based on GnuHash API Changes (ELF): * ELF::GnuHash::{check_bloom_filter, check_bucket, check} --- api/python/ELF/objects/pyGnuHash.cpp | 26 ++++++++++++++ examples/python/elf_reader.py | 2 +- include/LIEF/ELF/GnuHash.hpp | 27 +++++++++++++++ include/LIEF/ELF/utils.hpp | 1 + src/ELF/GnuHash.cpp | 52 +++++++++++++++++++++++++++- src/ELF/Parser.tcc | 33 ++++++++++++------ tests/elf/elf_test.py | 7 ++++ 7 files changed, 135 insertions(+), 13 deletions(-) diff --git a/api/python/ELF/objects/pyGnuHash.cpp b/api/python/ELF/objects/pyGnuHash.cpp index 4940663..4b7a2fc 100644 --- a/api/python/ELF/objects/pyGnuHash.cpp +++ b/api/python/ELF/objects/pyGnuHash.cpp @@ -65,6 +65,32 @@ void init_ELF_GnuHash_class(py::module& m) { "Hash values", py::return_value_policy::reference_internal) + .def("check_bloom_filter", + &GnuHash::check_bloom_filter, + "Check if the given hash pass the bloom filter", + "hash"_a) + + .def("check_bucket", + &GnuHash::check_bucket, + "Check if the given hash pass the bucket filter", + "hash"_a) + + .def("check", + static_cast(&GnuHash::check), + "Check if the symbol *probably* exists. If " + "the returned value is ``false`` you can assume at ``100%`` that " + "the symbol with the given name doesn't exists. If ``true`` you can't " + "do any assumption ", + "symbol_name"_a) + + .def("check", + static_cast(&GnuHash::check), + "Check if the symbol associated with the given *probably* exists. If " + "the returned value is ``false`` you can assume at ``100%`` that " + "the symbol doesn't exists. If ``true`` you can't " + "do any assumption", + "hash_value"_a) + .def("__eq__", &GnuHash::operator==) .def("__ne__", &GnuHash::operator!=) .def("__hash__", diff --git a/examples/python/elf_reader.py b/examples/python/elf_reader.py index 87a0dbc..9a67a23 100755 --- a/examples/python/elf_reader.py +++ b/examples/python/elf_reader.py @@ -13,7 +13,7 @@ import os import traceback from lief import Logger -Logger.set_level(lief.LOGGING_LEVEL.WARNING) +Logger.set_level(lief.LOGGING_LEVEL.INFO) from optparse import OptionParser terminal_rows, terminal_columns = 100, 100 diff --git a/include/LIEF/ELF/GnuHash.hpp b/include/LIEF/ELF/GnuHash.hpp index 29452f8..71e6553 100644 --- a/include/LIEF/ELF/GnuHash.hpp +++ b/include/LIEF/ELF/GnuHash.hpp @@ -37,6 +37,13 @@ class DLL_PUBLIC GnuHash : public Visitable { public: GnuHash(void); + GnuHash(uint32_t symbol_idx, + uint32_t shift2, + const std::vector& bloom_filters, + const std::vector& buckets, + const std::vector& hash_values = {}); + + GnuHash& operator=(const GnuHash& copy); GnuHash(const GnuHash& copy); virtual ~GnuHash(void); @@ -65,6 +72,24 @@ class DLL_PUBLIC GnuHash : public Visitable { //! @brief Hash values const std::vector& hash_values(void) const; + //! @brief Check if the given hash pass the bloom filter + bool check_bloom_filter(uint32_t hash) const; + + //! @brief Check if the given hash pass the bucket filter + bool check_bucket(uint32_t hash) const; + + //! @brief Check if the symbol *probably* exists. If + //! the returned value is ``false`` you can assume at ``100%`` that + //! the symbol with the given name doesn't exists. If ``true`` you can't + //! do any assumption + bool check(const std::string& symbol_name) const; + + //! @brief Check if the symbol associated with the given *probably* exists. If + //! the returned value is ``false`` you can assume at ``100%`` that + //! the symbol doesn't exists. If ``true`` you can't + //! do any assumption + bool check(uint32_t hash) const; + bool operator==(const GnuHash& rhs) const; bool operator!=(const GnuHash& rhs) const; @@ -79,6 +104,8 @@ class DLL_PUBLIC GnuHash : public Visitable { std::vector bloom_filters_; std::vector buckets_; std::vector hash_values_; + + size_t c_; }; diff --git a/include/LIEF/ELF/utils.hpp b/include/LIEF/ELF/utils.hpp index e66314f..f2205fa 100644 --- a/include/LIEF/ELF/utils.hpp +++ b/include/LIEF/ELF/utils.hpp @@ -35,6 +35,7 @@ DLL_PUBLIC unsigned long hash32(const char* name); DLL_PUBLIC unsigned long hash64(const char* name); DLL_PUBLIC uint32_t dl_new_hash(const char* name); + } } diff --git a/src/ELF/GnuHash.cpp b/src/ELF/GnuHash.cpp index 59ea9f1..f1dc485 100644 --- a/src/ELF/GnuHash.cpp +++ b/src/ELF/GnuHash.cpp @@ -19,6 +19,7 @@ #include "LIEF/visitors/Hash.hpp" +#include "LIEF/ELF/utils.hpp" #include "LIEF/ELF/GnuHash.hpp" namespace LIEF { @@ -32,7 +33,22 @@ GnuHash::GnuHash(void) : shift2_{0}, bloom_filters_{0}, buckets_{0}, - hash_values_{0} + hash_values_{0}, + c_{0} +{} + + +GnuHash::GnuHash(uint32_t symbol_idx, + uint32_t shift2, + const std::vector& bloom_filters, + const std::vector& buckets, + const std::vector& hash_values) : + symbol_index_{symbol_idx}, + shift2_{shift2}, + bloom_filters_{bloom_filters}, + buckets_{buckets}, + hash_values_{hash_values}, + c_{0} {} @@ -64,6 +80,40 @@ const std::vector& GnuHash::hash_values(void) const { return this->hash_values_; } +bool GnuHash::check_bloom_filter(uint32_t hash) const { + const size_t C = this->c_; + const uint32_t h1 = hash; + const uint32_t h2 = hash >> this->shift2(); + + const uint32_t n1 = (h1 / C) % this->maskwords(); + + const uint32_t b1 = h1 % C; + const uint32_t b2 = h2 % C; + const uint64_t filter = this->bloom_filters()[n1]; + return (filter >> b1) & (filter >> b2) & 1; +} + + +bool GnuHash::check_bucket(uint32_t hash) const { + return this->buckets()[hash % this->nb_buckets()] > 0; +} + +bool GnuHash::check(const std::string& symbol_name) const { + uint32_t hash = dl_new_hash(symbol_name.c_str()); + return this->check(hash); +} + + +bool GnuHash::check(uint32_t hash) const { + if (not this->check_bloom_filter(hash)) { // Bloom filter not passed + return false; + } + + if (not this->check_bucket(hash)) { // hash buck not passed + return false; + } + return true; +} bool GnuHash::operator==(const GnuHash& rhs) const { size_t hash_lhs = Hash::hash(*this); diff --git a/src/ELF/Parser.tcc b/src/ELF/Parser.tcc index ea49534..c5848c8 100644 --- a/src/ELF/Parser.tcc +++ b/src/ELF/Parser.tcc @@ -649,7 +649,7 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const { const uint32_t nbuckets = header[0]; const uint32_t symndx = header[1]; const uint32_t maskwords = header[2]; - //const uint32_t shift2 = header[3]; + const uint32_t shift2 = header[3]; if (maskwords & (maskwords - 1)) { LOG(WARNING) << "maskwords is not a power of 2"; @@ -692,21 +692,30 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const { return 0; } - nb_symbols = std::max(nb_symbols, symndx); + nb_symbols = symndx; - const uint32_t* hash_values = reinterpret_cast( - this->stream_->read(current_offset, nb_symbols * sizeof(uint32_t))); + GnuHash gnuhash{symndx, shift2, bloom_filters, buckets}; + gnuhash.c_ = sizeof(uint__) * 8; - // "It is set to 1 when a symbol is the last symbol in a given hash chain" - while (((*hash_values) & 1) == 0) { - ++nb_symbols; - ++hash_values; + // Register the size of symbols store a the buckets + std::vector nbsym_buckets(nbuckets, 0); + + for (size_t i = 0; i < nbuckets; ++i) { + uint32_t hash_value = 0; + size_t nsyms = 0; + do { + hash_value = this->stream_->read_integer(current_offset); + current_offset += sizeof(uint32_t); + + nsyms++; + } while ((hash_value & 1) == 0); // "It is set to 1 when a symbol is the last symbol in a given hash bucket" + + nbsym_buckets[i] = buckets[i] + nsyms; } - return ++nb_symbols; - - + nb_symbols = std::max(nb_symbols, *std::max_element(std::begin(nbsym_buckets), std::end(nbsym_buckets))); + return nb_symbols; } template @@ -897,6 +906,7 @@ void Parser::parse_dynamic_symbols(uint64_t offset) { VLOG(VDEBUG) << "[+] Parsing dynamics symbols"; uint32_t nb_symbols = this->get_numberof_dynamic_symbols(this->count_mtd_); + VLOG(VDEBUG) << "Number of symbols counted: " << nb_symbols; const Elf_Off dynamic_symbols_offset = offset; const Elf_Off string_offset = this->get_dynamic_string_table(); @@ -1494,6 +1504,7 @@ void Parser::parse_symbol_gnu_hash(uint64_t offset) { VLOG(VDEBUG) << "[+] Parser symbol GNU hash"; GnuHash gnuhash; + gnuhash.c_ = sizeof(uint__) * 8; uint64_t current_offset = offset; diff --git a/tests/elf/elf_test.py b/tests/elf/elf_test.py index 9c292a0..c0f49ab 100644 --- a/tests/elf/elf_test.py +++ b/tests/elf/elf_test.py @@ -76,6 +76,13 @@ class TestELF(TestCase): 0x12F7C433, 0xEB01FAB6, 0xECD54543, 0xAD3C9892, 0x72632CCF, 0x12F7A2B3, 0x7C92E3BB, 0x7C96F087] self.assertEqual(hash_values, hash_values_test) + #for s in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]: + # print(gnu_hash.check(s.name), s.name) + self.assertTrue(all(gnu_hash.check(x.name) for x in list(ls.dynamic_symbols)[gnu_hash.symbol_index:])) + + self.assertFalse(gnu_hash.check("foofdsfdsfds")) + self.assertFalse(gnu_hash.check("fazertrvkdfsrezklqpfjeopqdi")) + def test_permutation(self): samples = [ "ELF/ELF64_x86-64_binary_ls.bin",