Bug fix in dynamic symbols counting based on GnuHash

API Changes (ELF):

  * ELF::GnuHash::{check_bloom_filter, check_bucket, check}
This commit is contained in:
Romain Thomas 2017-09-22 14:42:39 +02:00
parent 2f53542eb0
commit 9036a2405d
7 changed files with 135 additions and 13 deletions

View File

@ -65,6 +65,32 @@ void init_ELF_GnuHash_class(py::module& m) {
"Hash values",
py::return_value_policy::reference_internal)
.def("check_bloom_filter",
&GnuHash::check_bloom_filter,
"Check if the given hash pass the bloom filter",
"hash"_a)
.def("check_bucket",
&GnuHash::check_bucket,
"Check if the given hash pass the bucket filter",
"hash"_a)
.def("check",
static_cast<bool(GnuHash::*)(const std::string&) const>(&GnuHash::check),
"Check if the symbol *probably* exists. If "
"the returned value is ``false`` you can assume at ``100%`` that "
"the symbol with the given name doesn't exists. If ``true`` you can't "
"do any assumption ",
"symbol_name"_a)
.def("check",
static_cast<bool(GnuHash::*)(uint32_t) const>(&GnuHash::check),
"Check if the symbol associated with the given *probably* exists. If "
"the returned value is ``false`` you can assume at ``100%`` that "
"the symbol doesn't exists. If ``true`` you can't "
"do any assumption",
"hash_value"_a)
.def("__eq__", &GnuHash::operator==)
.def("__ne__", &GnuHash::operator!=)
.def("__hash__",

View File

@ -13,7 +13,7 @@ import os
import traceback
from lief import Logger
Logger.set_level(lief.LOGGING_LEVEL.WARNING)
Logger.set_level(lief.LOGGING_LEVEL.INFO)
from optparse import OptionParser
terminal_rows, terminal_columns = 100, 100

View File

@ -37,6 +37,13 @@ class DLL_PUBLIC GnuHash : public Visitable {
public:
GnuHash(void);
GnuHash(uint32_t symbol_idx,
uint32_t shift2,
const std::vector<uint64_t>& bloom_filters,
const std::vector<uint32_t>& buckets,
const std::vector<uint32_t>& hash_values = {});
GnuHash& operator=(const GnuHash& copy);
GnuHash(const GnuHash& copy);
virtual ~GnuHash(void);
@ -65,6 +72,24 @@ class DLL_PUBLIC GnuHash : public Visitable {
//! @brief Hash values
const std::vector<uint32_t>& hash_values(void) const;
//! @brief Check if the given hash pass the bloom filter
bool check_bloom_filter(uint32_t hash) const;
//! @brief Check if the given hash pass the bucket filter
bool check_bucket(uint32_t hash) const;
//! @brief Check if the symbol *probably* exists. If
//! the returned value is ``false`` you can assume at ``100%`` that
//! the symbol with the given name doesn't exists. If ``true`` you can't
//! do any assumption
bool check(const std::string& symbol_name) const;
//! @brief Check if the symbol associated with the given *probably* exists. If
//! the returned value is ``false`` you can assume at ``100%`` that
//! the symbol doesn't exists. If ``true`` you can't
//! do any assumption
bool check(uint32_t hash) const;
bool operator==(const GnuHash& rhs) const;
bool operator!=(const GnuHash& rhs) const;
@ -79,6 +104,8 @@ class DLL_PUBLIC GnuHash : public Visitable {
std::vector<uint64_t> bloom_filters_;
std::vector<uint32_t> buckets_;
std::vector<uint32_t> hash_values_;
size_t c_;
};

View File

@ -35,6 +35,7 @@ DLL_PUBLIC unsigned long hash32(const char* name);
DLL_PUBLIC unsigned long hash64(const char* name);
DLL_PUBLIC uint32_t dl_new_hash(const char* name);
}
}

View File

@ -19,6 +19,7 @@
#include "LIEF/visitors/Hash.hpp"
#include "LIEF/ELF/utils.hpp"
#include "LIEF/ELF/GnuHash.hpp"
namespace LIEF {
@ -32,7 +33,22 @@ GnuHash::GnuHash(void) :
shift2_{0},
bloom_filters_{0},
buckets_{0},
hash_values_{0}
hash_values_{0},
c_{0}
{}
GnuHash::GnuHash(uint32_t symbol_idx,
uint32_t shift2,
const std::vector<uint64_t>& bloom_filters,
const std::vector<uint32_t>& buckets,
const std::vector<uint32_t>& hash_values) :
symbol_index_{symbol_idx},
shift2_{shift2},
bloom_filters_{bloom_filters},
buckets_{buckets},
hash_values_{hash_values},
c_{0}
{}
@ -64,6 +80,40 @@ const std::vector<uint32_t>& GnuHash::hash_values(void) const {
return this->hash_values_;
}
bool GnuHash::check_bloom_filter(uint32_t hash) const {
const size_t C = this->c_;
const uint32_t h1 = hash;
const uint32_t h2 = hash >> this->shift2();
const uint32_t n1 = (h1 / C) % this->maskwords();
const uint32_t b1 = h1 % C;
const uint32_t b2 = h2 % C;
const uint64_t filter = this->bloom_filters()[n1];
return (filter >> b1) & (filter >> b2) & 1;
}
bool GnuHash::check_bucket(uint32_t hash) const {
return this->buckets()[hash % this->nb_buckets()] > 0;
}
bool GnuHash::check(const std::string& symbol_name) const {
uint32_t hash = dl_new_hash(symbol_name.c_str());
return this->check(hash);
}
bool GnuHash::check(uint32_t hash) const {
if (not this->check_bloom_filter(hash)) { // Bloom filter not passed
return false;
}
if (not this->check_bucket(hash)) { // hash buck not passed
return false;
}
return true;
}
bool GnuHash::operator==(const GnuHash& rhs) const {
size_t hash_lhs = Hash::hash(*this);

View File

@ -649,7 +649,7 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const {
const uint32_t nbuckets = header[0];
const uint32_t symndx = header[1];
const uint32_t maskwords = header[2];
//const uint32_t shift2 = header[3];
const uint32_t shift2 = header[3];
if (maskwords & (maskwords - 1)) {
LOG(WARNING) << "maskwords is not a power of 2";
@ -692,21 +692,30 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const {
return 0;
}
nb_symbols = std::max(nb_symbols, symndx);
nb_symbols = symndx;
const uint32_t* hash_values = reinterpret_cast<const uint32_t*>(
this->stream_->read(current_offset, nb_symbols * sizeof(uint32_t)));
GnuHash gnuhash{symndx, shift2, bloom_filters, buckets};
gnuhash.c_ = sizeof(uint__) * 8;
// "It is set to 1 when a symbol is the last symbol in a given hash chain"
while (((*hash_values) & 1) == 0) {
++nb_symbols;
++hash_values;
// Register the size of symbols store a the buckets
std::vector<size_t> nbsym_buckets(nbuckets, 0);
for (size_t i = 0; i < nbuckets; ++i) {
uint32_t hash_value = 0;
size_t nsyms = 0;
do {
hash_value = this->stream_->read_integer<uint32_t>(current_offset);
current_offset += sizeof(uint32_t);
nsyms++;
} while ((hash_value & 1) == 0); // "It is set to 1 when a symbol is the last symbol in a given hash bucket"
nbsym_buckets[i] = buckets[i] + nsyms;
}
return ++nb_symbols;
nb_symbols = std::max<uint32_t>(nb_symbols, *std::max_element(std::begin(nbsym_buckets), std::end(nbsym_buckets)));
return nb_symbols;
}
template<typename ELF_T>
@ -897,6 +906,7 @@ void Parser::parse_dynamic_symbols(uint64_t offset) {
VLOG(VDEBUG) << "[+] Parsing dynamics symbols";
uint32_t nb_symbols = this->get_numberof_dynamic_symbols<ELF_T>(this->count_mtd_);
VLOG(VDEBUG) << "Number of symbols counted: " << nb_symbols;
const Elf_Off dynamic_symbols_offset = offset;
const Elf_Off string_offset = this->get_dynamic_string_table();
@ -1494,6 +1504,7 @@ void Parser::parse_symbol_gnu_hash(uint64_t offset) {
VLOG(VDEBUG) << "[+] Parser symbol GNU hash";
GnuHash gnuhash;
gnuhash.c_ = sizeof(uint__) * 8;
uint64_t current_offset = offset;

View File

@ -76,6 +76,13 @@ class TestELF(TestCase):
0x12F7C433, 0xEB01FAB6, 0xECD54543, 0xAD3C9892, 0x72632CCF, 0x12F7A2B3, 0x7C92E3BB, 0x7C96F087]
self.assertEqual(hash_values, hash_values_test)
#for s in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]:
# print(gnu_hash.check(s.name), s.name)
self.assertTrue(all(gnu_hash.check(x.name) for x in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]))
self.assertFalse(gnu_hash.check("foofdsfdsfds"))
self.assertFalse(gnu_hash.check("fazertrvkdfsrezklqpfjeopqdi"))
def test_permutation(self):
samples = [
"ELF/ELF64_x86-64_binary_ls.bin",