From 31681a90cd2fd8cc3ea91e88ba81ea0d85f477be Mon Sep 17 00:00:00 2001 From: Dan Weaver <31134699+dANW34V3R@users.noreply.github.com> Date: Tue, 28 Mar 2023 14:38:11 +0100 Subject: [PATCH] Correctly Fill Auxiliary Vector (#300) This pull request fills the auxiliary vector with values for AT_PHDR, AT_PHENT, AT_PHNUM and AT_ENTRY as well as minor cleanup of Elf.hh and Elf.cc. Without this, programs compiled targeting RISC-V with gcc would not execute correctly as they did not know where the program headers were located. --- src/include/simeng/Elf.hh | 73 +++++++++++++++++--- src/include/simeng/kernel/LinuxProcess.hh | 33 +++++++++ src/lib/Elf.cc | 83 ++++++++++++++--------- src/lib/kernel/LinuxProcess.cc | 22 +++++- 4 files changed, 169 insertions(+), 42 deletions(-) diff --git a/src/include/simeng/Elf.hh b/src/include/simeng/Elf.hh index 88e419e6a5..96e9b0f06e 100644 --- a/src/include/simeng/Elf.hh +++ b/src/include/simeng/Elf.hh @@ -12,13 +12,37 @@ const char Format32 = 1; const char Format64 = 2; } // namespace ElfBitFormat -struct ElfHeader { - uint32_t type; - uint64_t offset; - uint64_t virtualAddress; - uint64_t physicalAddress; - uint64_t fileSize; - uint64_t memorySize; +// Elf64_Phdr as described in the elf man page. Only contains SimEng relevant +// information + +// An executable or shared object file's program header table is an array of +// structures, each describing a segment or other information the system needs +// to prepare the program for execution. An object file segment contains one or +// more sections. Program headers are meaningful only for executable and shared +// object files. A file specifies its own program header size with the ELF +// header's e_phentsize and the number of headers with e_phnum members. The ELF +// program header is described by the type Elf32_Phdr or Elf64_Phdr depending on +// the architecture + +struct Elf64_Phdr { + // Indicates what kind of segment this array element describes or + // how to interpret the array element's information + uint32_t p_type; + // Holds the offset from the beginning of the file at + // which the first byte of the segment resides + uint64_t p_offset; + // Holds the virtual address at which the first byte of the + // segment resides in memory + uint64_t p_vaddr; + // On systems for which physical addressing is relevant, this + // member is reserved for the segment's physical address + uint64_t p_paddr; + // Holds the number of bytes in the file image of + // the segment. It may be zero + uint64_t p_filesz; + // Holds the number of bytes in the memory image + // of the segment. It may be zero + uint64_t p_memsz; }; /** A processed Executable and Linkable Format (ELF) file. */ @@ -26,14 +50,47 @@ class Elf { public: Elf(std::string path, char** imagePointer); ~Elf(); + + /** Returns the process image size */ uint64_t getProcessImageSize() const; + + /** Returns if this ELF is valid */ bool isValid() const; + + /** Returns the virtual address to which the system first transfers + * control */ uint64_t getEntryPoint() const; + /** Returns the virtual address of the program header table */ + uint64_t getPhdrTableAddress() const; + + /** Returns the size of a program header entry */ + uint64_t getPhdrEntrySize() const; + + /** Returns the number of program headers */ + uint64_t getNumPhdr() const; + private: + /** The entry point of the program */ uint64_t entryPoint_; - std::vector headers_; + + /** A vector holding each of the program headers extracted from the ELF */ + std::vector pheaders_; + + /** The program header entry size stored in the ELF header */ + uint16_t e_phentsize_; + + /** The number of entries in the program header table stored in the ELF header + */ + uint16_t e_phnum_; + + /** Virtual address of the program header table */ + uint64_t phdrTableAddress_ = 0; + + /** Holds whether this ELF is valid for SimEng */ bool isValid_ = false; + + /** The size of the process image */ uint64_t processImageSize_; }; diff --git a/src/include/simeng/kernel/LinuxProcess.hh b/src/include/simeng/kernel/LinuxProcess.hh index 9796b52937..2f13a7727d 100644 --- a/src/include/simeng/kernel/LinuxProcess.hh +++ b/src/include/simeng/kernel/LinuxProcess.hh @@ -8,6 +8,30 @@ namespace simeng { namespace kernel { +namespace auxVec { +// Labels for the entries in the auxiliary vector +enum labels { + AT_NULL = 0, // End of vector + AT_IGNORE = 1, // Entry should be ignored + AT_EXECFD = 2, // File descriptor of program + AT_PHDR = 3, // Program headers for program + AT_PHENT = 4, // Size of program header entry + AT_PHNUM = 5, // Number of program headers + AT_PAGESZ = 6, // System page size + AT_BASE = 7, // Base address of interpreter + AT_FLAGS = 8, // Flags + AT_ENTRY = 9, // Entry point of program + AT_NOTELF = 10, // Program is not ELF + AT_UID = 11, // Real uid + AT_EUID = 12, // Effective uid + AT_GID = 13, // Real gid + AT_EGID = 14, // Effective gid + AT_PLATFORM = 15, // String identifying CPU for optimizations + AT_HWCAP = 16, // Arch dependent hints at CPU capabilities + AT_CLKTCK = 17 // Frequency at which times() increments +}; +} // namespace auxVec + /** Align `address` to an `alignTo`-byte boundary by rounding up to the nearest * multiple. */ uint64_t alignToBoundary(uint64_t value, uint64_t boundary); @@ -90,6 +114,15 @@ class LinuxProcess { /** The entry point of the process. */ uint64_t entryPoint_ = 0; + /** Program header table virtual address */ + uint64_t progHeaderTableAddress_ = 0; + + /** Number of program headers */ + uint64_t numProgHeaders_ = 0; + + /** Size of program header entry */ + uint64_t progHeaderEntSize_ = 0; + /** The address of the start of the heap region. */ uint64_t heapStart_; diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc index 6654cc86a8..28b138558b 100644 --- a/src/lib/Elf.cc +++ b/src/lib/Elf.cc @@ -2,6 +2,7 @@ #include #include +#include namespace simeng { @@ -36,6 +37,7 @@ Elf::Elf(std::string path, char** imagePointer) { char fileMagic[4]; file.read(fileMagic, 4); if (std::memcmp(elfMagic, fileMagic, sizeof(elfMagic))) { + std::cerr << "[SimEng:Elf] Elf magic does not match" << std::endl; return; } @@ -48,6 +50,8 @@ Elf::Elf(std::string path, char** imagePointer) { char bitFormat; file.read(&bitFormat, sizeof(bitFormat)); if (bitFormat != ElfBitFormat::Format64) { + std::cerr << "[SimEng:Elf] Unsupported architecture detected in Elf" + << std::endl; return; } @@ -73,37 +77,38 @@ Elf::Elf(std::string path, char** imagePointer) { */ // Seek to the byte representing the start of the header offset table. - uint64_t headerOffset; - file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); + // Holds the program header table's file offset in bytes. If the file has no + // program header table, this member holds zero + uint64_t e_phoff = 0; + file.read(reinterpret_cast(&e_phoff), sizeof(e_phoff)); /** - * Starting 54th byte of the ELF Header a 16-bit value indicates - * the size of each entry in the ELF Program header. In the `elf64_hdr` - * struct this value maps to the member `Elf64_Half e_phentsize`. All - * header entries have the same size. - * Starting from the 56th byte a 16-bit value represents the number - * of header entries in the ELF Program header. In the `elf64_hdr` - * struct this value maps to `Elf64_Half e_phnum`. + * Starting from the 54th byte of the ELF Header a 16-bit value indicates the + * size in bytes of one entry in the file's program header table; all entries + * are the same size. In the `elf64_hdr` struct this value maps to the member + * `Elf64_Half e_phentsize`. */ - // Seek to the byte representing header entry size. file.seekg(0x36); - uint16_t headerEntrySize; - file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); - uint16_t headerEntries; - file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); + file.read(reinterpret_cast(&e_phentsize_), sizeof(e_phentsize_)); + + /** Starting from the 56th byte a 16-bit value represents the number + * of program header entries in the ELF Program header table. In the + * `elf64_hdr` struct this value maps to `Elf64_Half e_phnum`. + */ + file.read(reinterpret_cast(&e_phnum_), sizeof(e_phnum_)); // Resize the header to equal the number of header entries. - headers_.resize(headerEntries); + pheaders_.resize(e_phnum_); processImageSize_ = 0; // Loop over all headers and extract them. - for (size_t i = 0; i < headerEntries; i++) { + for (size_t i = 0; i < e_phnum_; i++) { // Since all headers entries have the same size. // We can extract the nth header using the header offset // and header entry size. - file.seekg(headerOffset + (i * headerEntrySize)); - auto& header = headers_[i]; + file.seekg(e_phoff + (i * e_phentsize_)); + auto& header = pheaders_[i]; /** * Like the ELF Header, the ELF Program header is also defined @@ -133,22 +138,30 @@ Elf::Elf(std::string path, char** imagePointer) { // Each address-related field is 8 bytes in a 64-bit ELF file const int fieldBytes = 8; - file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); + file.read(reinterpret_cast(&(header.p_type)), sizeof(header.p_type)); file.seekg(4, std::ios::cur); // Skip flags - file.read(reinterpret_cast(&(header.offset)), fieldBytes); - file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); - file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); - file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); - file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); + file.read(reinterpret_cast(&(header.p_offset)), fieldBytes); + file.read(reinterpret_cast(&(header.p_vaddr)), fieldBytes); + file.read(reinterpret_cast(&(header.p_paddr)), fieldBytes); + file.read(reinterpret_cast(&(header.p_filesz)), fieldBytes); + file.read(reinterpret_cast(&(header.p_memsz)), fieldBytes); + // Skip p_align // To construct the process we look for the largest virtual address and // add it to the memory size of the header. This way we obtain a very // large array which can hold data at large virtual address. // However, this way we end up creating a sparse array, in which most - // of the entries are unused. Also SimEng internally treats these + // of the entries are unused. Also, SimEng internally treats these // virtual address as physical addresses to index into this large array. - if (header.virtualAddress + header.memorySize > processImageSize_) { - processImageSize_ = header.virtualAddress + header.memorySize; + if (header.p_vaddr + header.p_memsz > processImageSize_) { + processImageSize_ = header.p_vaddr + header.p_memsz; + } + + // Determine the virtual address of the header table in memory from + // individual program headers. Used to populate the auxvec + if (header.p_offset <= e_phoff && + e_phoff < header.p_offset + header.p_filesz) { + phdrTableAddress_ = header.p_vaddr + (e_phoff - header.p_offset); } } @@ -162,12 +175,12 @@ Elf::Elf(std::string path, char** imagePointer) { */ // Process headers; only observe LOAD sections for this basic implementation - for (const auto& header : headers_) { - if (header.type == 1) { // LOAD - file.seekg(header.offset); - // Read `fileSize` bytes from `file` into the appropriate place in process + for (const auto& header : pheaders_) { + if (header.p_type == 1) { // LOAD + file.seekg(header.p_offset); + // Read `p_filesz` bytes from `file` into the appropriate place in process // memory - file.read(*imagePointer + header.virtualAddress, header.fileSize); + file.read(*imagePointer + header.p_vaddr, header.p_filesz); } } @@ -183,4 +196,10 @@ uint64_t Elf::getEntryPoint() const { return entryPoint_; } bool Elf::isValid() const { return isValid_; } +uint64_t Elf::getPhdrTableAddress() const { return phdrTableAddress_; } + +uint64_t Elf::getPhdrEntrySize() const { return e_phentsize_; } + +uint64_t Elf::getNumPhdr() const { return e_phnum_; } + } // namespace simeng diff --git a/src/lib/kernel/LinuxProcess.cc b/src/lib/kernel/LinuxProcess.cc index 31e36d7f48..e3bc57df6f 100644 --- a/src/lib/kernel/LinuxProcess.cc +++ b/src/lib/kernel/LinuxProcess.cc @@ -32,6 +32,10 @@ LinuxProcess::LinuxProcess(const std::vector& commandLine, entryPoint_ = elf.getEntryPoint(); + progHeaderTableAddress_ = elf.getPhdrTableAddress(); + progHeaderEntSize_ = elf.getPhdrEntrySize(); + numProgHeaders_ = elf.getNumPhdr(); + // Align heap start to a 32-byte boundary heapStart_ = alignToBoundary(elf.getProcessImageSize(), 32); @@ -160,9 +164,23 @@ void LinuxProcess::createStack(char** processImage) { // ELF auxillary vector, keys defined in `uapi/linux/auxvec.h` // TODO: populate remaining auxillary vector entries - initialStackFrame.push_back(6); // AT_PAGESZ + initialStackFrame.push_back(auxVec::AT_PHDR); // AT_PHDR + initialStackFrame.push_back(progHeaderTableAddress_); + + initialStackFrame.push_back(auxVec::AT_PHENT); // AT_PHENT + initialStackFrame.push_back(progHeaderEntSize_); + + initialStackFrame.push_back(auxVec::AT_PHNUM); // AT_PHNUM + initialStackFrame.push_back(numProgHeaders_); + + initialStackFrame.push_back(auxVec::AT_PAGESZ); // AT_PAGESZ initialStackFrame.push_back(pageSize_); - initialStackFrame.push_back(0); // null terminator + + initialStackFrame.push_back(auxVec::AT_ENTRY); // AT_ENTRY + initialStackFrame.push_back(entryPoint_); + + initialStackFrame.push_back(auxVec::AT_NULL); // null terminator + initialStackFrame.push_back(0); size_t stackFrameSize = initialStackFrame.size() * 8;