LIEF/tests/GenerateConfigELF.py

235 lines
8.1 KiB
Python
Raw Normal View History

2017-03-30 16:56:49 +02:00
#!/usr/bin/env python2
#-*- coding: utf-8 -*-
# This file is used to generate config file for LIEF test
# Basically it parse it parse the output of readelf
import re
import yaml
import subprocess
import sys
import hashlib
p = subprocess.Popen(["readelf", "-a", sys.argv[1]], stdout=subprocess.PIPE)
(output, err) = p.communicate()
data = output
binary = dict()
header = dict()
#ELF64
elf64_regex = {
"entrypoint" : "Adresse du point d'entrée:\s+0x([0-9a-f]+)",
"sectionoffset" : "Début des en-têtes de section\s*:\s+([0-9]+)",
"offsetToPhdr" : "Début des en-têtes de programme :\s+([0-9]+)",
"nbShdr" : "Nombre d'en-têtes de section\s*:\s+([0-9]+)",
"nbPhdr" : "Nombre d'en-tête du programme\s*:\s+([0-9]+)",
"sections" : ur'\[\s*(\d+)\]\s?(\S+|\s*)\s+\S+\s+([0-9A-Fa-f]+)\s+([0-9A-Fa-f]+)\n\s+([0-9A-Fa-f]+)\s+([0-9A-Fa-f]+)',
"segments" : ur'\s+(\w+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\n\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)',
"dynamicSymGlob": ur'^T(?:.*)\.dynsym.*\n((?:\s{2,}(?:.*)\n)+)',
"dynamicSymLoc" : ur'([0-9]+):\s+[0-9a-fA-F]+\s+[0-9]+\s+\S+\s+\S+\s+\S+\s+\S+\s?([^@\n]*)',
"staticSymGlob" : ur'^T(?:.*)\.symtab.*\n((?:.*\n)+)$\s',
"staticSymLoc" : ur'([0-9]+):\s+[0-9a-fA-F]+\s+[0-9]+\s+\S+\s+\S+\s+\S+\s+\S+ (\S*)[@\n]+',
"dynRelocaGlob" : ur'^S(?:.*)\.rel[a]*\.dyn.*\n((?:.{2,}(?:.*)\n)+)',
"dynRelocaLoc" : ur'^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+\S+\s+([0-9a-fA-F]+)\s*(?:\n|(?:\s+(\S+)))',
"pltRelocaGlob" : ur'^S(?:.*)\.rel[a]*\.plt.*\n((?:.{2,}(?:.*)\n)+)',
"pltRelocaLoc" : ur'^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+\S+\s+([0-9a-fA-F]+)\s*(?:\n|(?:\s+(\S+)))'
}
# ELF32
elf64_regex = {
"entrypoint" : "Adresse du point d'entrée:\s+0x([0-9a-f]+)",
"sectionoffset" : "Début des en-têtes de section\s*:\s+([0-9]+)",
"offsetToPhdr" : "Début des en-têtes de programme :\s+([0-9]+)",
"nbShdr" : "Nombre d'en-têtes de section\s*:\s+([0-9]+)",
"nbPhdr" : "Nombre d'en-tête du programme\s*:\s+([0-9]+)",
"sections" : ur'\s+\[\s*([0-9]+)\]\s?(\s|\S+)\s+\S+\s+([0-9A-Fa-f]+)\s+([0-9A-Fa-f]+)\s+([0-9A-Fa-f]+)\s+([0-9A-Fa-f]+)',
"segments" : ur'\s+(\w+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s0x([0-9A-Fa-f]+)\s0x([0-9A-Fa-f]+)\s0x([0-9A-Fa-f]+)',
"dynamicSymGlob": ur'^T(?:.*)\.dynsym.*\n((?:\s{2,}(?:.*)\n)+)',
"dynamicSymLoc" : ur'([0-9]+):\s+[0-9a-fA-F]+\s+[0-9]+\s+\S+\s+\S+\s+\S+\s+\S+\s?([^@\n]*)',
"staticSymGlob" : ur'^T(?:.*)\.symtab.*\n((?:.*\n)+)$\s',
"staticSymLoc" : ur'([0-9]+):\s+[0-9a-fA-F]+\s+[0-9]+\s+\S+\s+\S+\s+\S+\s+\S+ (\S*)[@\n]+',
"dynRelocaGlob" : ur'^S(?:.*)\.rel[a]*\.dyn.*\n((?:.{2,}(?:.*)\n)+)',
"dynRelocaLoc" : ur'^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+\S+\s+([0-9a-fA-F]+)\s*(?:\n|(?:\s+([^@\n]+)))',
"pltRelocaGlob" : ur'^S(?:.*)\.rel[a]*\.plt.*\n((?:.{2,}(?:.*)\n)+)',
"pltRelocaLoc" : ur'^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+\S+\s+([0-9a-fA-F]+)\s*(?:\n|(?:\s+([^@\n]+)))'
}
#
# File info
#
binary["filename"] = str(sys.argv[1]).split("/")[-1]
binary["hash"] = hashlib.md5(sys.argv[1]).hexdigest()
#path: "@CMAKE_CURRENT_SOURCE_DIR@/samples/ELF/x86-64/binaries/ls"
#
# header
#
entrypoint = re.search(elf64_regex["entrypoint"], data).groups()[0]
sectionoffset = re.search(elf64_regex["sectionoffset"], data).groups()[0]
offsetToPhdr = re.search(elf64_regex["offsetToPhdr"], data).groups()[0]
nbShdr = re.search(elf64_regex["nbShdr"], data).groups()[0]
nbPhdr = re.search(elf64_regex["nbPhdr"], data).groups()[0]
header['entryPoint'] = int(entrypoint, 16)
header['offsetToShdr'] = int(sectionoffset)
header['offsetToPhdr'] = int(offsetToPhdr)
header['nbShdr'] = int(nbShdr)
header['nbPhdr'] = int(nbPhdr)
#
# Sections
#
section_regexp = re.compile(elf64_regex["sections"], re.MULTILINE)
sections_yaml = []
sections = re.findall(section_regexp, data)
for section in sections:
section_yaml = {
'nb' : int(section[0]),
'name' : '%s' % (section[1].strip()),
'address': int(section[2],16),
'offset' : int(section[3],16),
'size' : int(section[4],16)
}
sections_yaml.append(section_yaml)
#
# Segments
#
segment_regexp = re.compile(elf64_regex["segments"], re.MULTILINE)
segments = re.findall(segment_regexp, data)
segments_yaml = []
for segment in segments:
segment_yaml = {
'offset' : int(segment[1], 16),
'vAddress': int(segment[2], 16),
'pAddress': int(segment[3], 16),
'fSize' : int(segment[4], 16),
'vSize' : int(segment[5], 16)
}
segments_yaml.append(segment_yaml)
#
# Relocations
#
#relocations_regexp = re.compile(ur'^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+(.\S+)\s+([0-9a-fA-F]+)\s+(.\S+)', re.MULTILINE)
#relocations = re.findall(relocations_regexp, data)
#relocations_yaml = []
#for relocation in relocations:
# relocation_yaml = {
# 'offset': int(relocation[0], 16),
# 'info' : int(relocation[1], 16),
# 'name' : relocation[4]
# }
# if relocation_yaml not in relocations_yaml:
# relocations_yaml.append(relocation_yaml);
#
# Dynamic symboles
#
extract_regexp = re.compile(elf64_regex["dynamicSymGlob"], re.MULTILINE)
dynsyms_yaml = []
if len(re.findall(extract_regexp, data)) > 0:
extracted = re.findall(extract_regexp, data)[0]
dynsyms_regexp = re.compile(elf64_regex["dynamicSymLoc"], re.MULTILINE)
dynsyms = re.findall(dynsyms_regexp, extracted)
dynsyms_yaml = []
for dynsym in dynsyms:
dynsym_yaml = {
'num' : int(dynsym[0]),
'name': dynsym[1]
}
dynsyms_yaml.append(dynsym_yaml)
#
# Static symbols
#
extract_regexp = re.compile(elf64_regex["staticSymGlob"], re.MULTILINE)
staticsyms = []
extracted = ""
if len(re.findall(extract_regexp, data)) > 0:
extracted = re.findall(extract_regexp, data)[0]
staticsyms_regexp = re.compile(elf64_regex["staticSymLoc"], re.MULTILINE)
staticsyms = re.findall(staticsyms_regexp, extracted)
staticsyms_yaml = []
for staticsym in staticsyms:
staticsym_yaml = {
'num' : int(staticsym[0]),
'name': staticsym[1]
}
staticsyms_yaml.append(staticsym_yaml)
#
# Dynamic Relocations
#
extract_regexp = re.compile(elf64_regex["dynRelocaGlob"], re.MULTILINE)
relocations_dyn_yaml = []
if len(re.findall(extract_regexp, data)) > 0:
extracted = re.findall(extract_regexp, data)[0]
regexp = re.compile(elf64_regex["dynRelocaLoc"], re.MULTILINE)
relocations = re.findall(regexp, extracted)
relocations_dyn_yaml = []
for reloc in relocations:
relocation_yaml = {
'offset' : int(reloc[0], 16),
'info' : int(reloc[1], 16),
'value' : int(reloc[2], 16),
'name' : reloc[3]
}
relocations_dyn_yaml.append(relocation_yaml)
#
# .plt.got relocations
#
extract_regexp = re.compile(elf64_regex["pltRelocaGlob"], re.MULTILINE)
extracted = re.findall(extract_regexp, data)[0]
regexp = re.compile(elf64_regex["pltRelocaLoc"], re.MULTILINE)
relocations = re.findall(regexp, extracted)
relocations_plt_yaml = []
for reloc in relocations:
relocation_yaml = {
'offset' : int(reloc[0], 16),
'info' : int(reloc[1], 16),
'value' : int(reloc[2], 16),
'name' : reloc[3]
}
relocations_plt_yaml.append(relocation_yaml)
binary['Header'] = header
binary['Sections'] = sections_yaml
binary['Segments'] = segments_yaml
#binary['Relocations'] = relocations_yaml
if len(relocations_plt_yaml) > 0:
binary['PltGotReloc'] = relocations_plt_yaml
if len(dynsyms_yaml) > 0:
binary['DynamicSymbols'] = dynsyms_yaml
if len(relocations_dyn_yaml) > 0:
binary['DynamicReloc'] = relocations_dyn_yaml
if len(staticsyms_yaml) > 0:
binary['StaticSymbols'] = staticsyms_yaml
output = open(binary["filename"] + ".yaml", "w")
yaml.dump(binary, stream=output)
output.close()