Skip to content

Commit

Permalink
Correctly Fill Auxiliary Vector (#300)
Browse files Browse the repository at this point in the history
This pull request fills the auxiliary vector with values for AT_PHDR, AT_PHENT, AT_PHNUM and AT_ENTRY as well as minor cleanup of Elf.hh and Elf.cc.

Without this, programs compiled targeting RISC-V with gcc would not execute correctly as they did not know where the program headers were located.
  • Loading branch information
dANW34V3R authored Mar 28, 2023
1 parent bfac331 commit 31681a9
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 42 deletions.
73 changes: 65 additions & 8 deletions src/include/simeng/Elf.hh
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,85 @@ const char Format32 = 1;
const char Format64 = 2;
} // namespace ElfBitFormat

struct ElfHeader {
uint32_t type;
uint64_t offset;
uint64_t virtualAddress;
uint64_t physicalAddress;
uint64_t fileSize;
uint64_t memorySize;
// Elf64_Phdr as described in the elf man page. Only contains SimEng relevant
// information

// An executable or shared object file's program header table is an array of
// structures, each describing a segment or other information the system needs
// to prepare the program for execution. An object file segment contains one or
// more sections. Program headers are meaningful only for executable and shared
// object files. A file specifies its own program header size with the ELF
// header's e_phentsize and the number of headers with e_phnum members. The ELF
// program header is described by the type Elf32_Phdr or Elf64_Phdr depending on
// the architecture

struct Elf64_Phdr {
// Indicates what kind of segment this array element describes or
// how to interpret the array element's information
uint32_t p_type;
// Holds the offset from the beginning of the file at
// which the first byte of the segment resides
uint64_t p_offset;
// Holds the virtual address at which the first byte of the
// segment resides in memory
uint64_t p_vaddr;
// On systems for which physical addressing is relevant, this
// member is reserved for the segment's physical address
uint64_t p_paddr;
// Holds the number of bytes in the file image of
// the segment. It may be zero
uint64_t p_filesz;
// Holds the number of bytes in the memory image
// of the segment. It may be zero
uint64_t p_memsz;
};

/** A processed Executable and Linkable Format (ELF) file. */
class Elf {
public:
Elf(std::string path, char** imagePointer);
~Elf();

/** Returns the process image size */
uint64_t getProcessImageSize() const;

/** Returns if this ELF is valid */
bool isValid() const;

/** Returns the virtual address to which the system first transfers
* control */
uint64_t getEntryPoint() const;

/** Returns the virtual address of the program header table */
uint64_t getPhdrTableAddress() const;

/** Returns the size of a program header entry */
uint64_t getPhdrEntrySize() const;

/** Returns the number of program headers */
uint64_t getNumPhdr() const;

private:
/** The entry point of the program */
uint64_t entryPoint_;
std::vector<ElfHeader> headers_;

/** A vector holding each of the program headers extracted from the ELF */
std::vector<Elf64_Phdr> pheaders_;

/** The program header entry size stored in the ELF header */
uint16_t e_phentsize_;

/** The number of entries in the program header table stored in the ELF header
*/
uint16_t e_phnum_;

/** Virtual address of the program header table */
uint64_t phdrTableAddress_ = 0;

/** Holds whether this ELF is valid for SimEng */
bool isValid_ = false;

/** The size of the process image */
uint64_t processImageSize_;
};

Expand Down
33 changes: 33 additions & 0 deletions src/include/simeng/kernel/LinuxProcess.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,30 @@
namespace simeng {
namespace kernel {

namespace auxVec {
// Labels for the entries in the auxiliary vector
enum labels {
AT_NULL = 0, // End of vector
AT_IGNORE = 1, // Entry should be ignored
AT_EXECFD = 2, // File descriptor of program
AT_PHDR = 3, // Program headers for program
AT_PHENT = 4, // Size of program header entry
AT_PHNUM = 5, // Number of program headers
AT_PAGESZ = 6, // System page size
AT_BASE = 7, // Base address of interpreter
AT_FLAGS = 8, // Flags
AT_ENTRY = 9, // Entry point of program
AT_NOTELF = 10, // Program is not ELF
AT_UID = 11, // Real uid
AT_EUID = 12, // Effective uid
AT_GID = 13, // Real gid
AT_EGID = 14, // Effective gid
AT_PLATFORM = 15, // String identifying CPU for optimizations
AT_HWCAP = 16, // Arch dependent hints at CPU capabilities
AT_CLKTCK = 17 // Frequency at which times() increments
};
} // namespace auxVec

/** Align `address` to an `alignTo`-byte boundary by rounding up to the nearest
* multiple. */
uint64_t alignToBoundary(uint64_t value, uint64_t boundary);
Expand Down Expand Up @@ -90,6 +114,15 @@ class LinuxProcess {
/** The entry point of the process. */
uint64_t entryPoint_ = 0;

/** Program header table virtual address */
uint64_t progHeaderTableAddress_ = 0;

/** Number of program headers */
uint64_t numProgHeaders_ = 0;

/** Size of program header entry */
uint64_t progHeaderEntSize_ = 0;

/** The address of the start of the heap region. */
uint64_t heapStart_;

Expand Down
83 changes: 51 additions & 32 deletions src/lib/Elf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <cstring>
#include <fstream>
#include <iostream>

namespace simeng {

Expand Down Expand Up @@ -36,6 +37,7 @@ Elf::Elf(std::string path, char** imagePointer) {
char fileMagic[4];
file.read(fileMagic, 4);
if (std::memcmp(elfMagic, fileMagic, sizeof(elfMagic))) {
std::cerr << "[SimEng:Elf] Elf magic does not match" << std::endl;
return;
}

Expand All @@ -48,6 +50,8 @@ Elf::Elf(std::string path, char** imagePointer) {
char bitFormat;
file.read(&bitFormat, sizeof(bitFormat));
if (bitFormat != ElfBitFormat::Format64) {
std::cerr << "[SimEng:Elf] Unsupported architecture detected in Elf"
<< std::endl;
return;
}

Expand All @@ -73,37 +77,38 @@ Elf::Elf(std::string path, char** imagePointer) {
*/

// Seek to the byte representing the start of the header offset table.
uint64_t headerOffset;
file.read(reinterpret_cast<char*>(&headerOffset), sizeof(headerOffset));
// Holds the program header table's file offset in bytes. If the file has no
// program header table, this member holds zero
uint64_t e_phoff = 0;
file.read(reinterpret_cast<char*>(&e_phoff), sizeof(e_phoff));

/**
* Starting 54th byte of the ELF Header a 16-bit value indicates
* the size of each entry in the ELF Program header. In the `elf64_hdr`
* struct this value maps to the member `Elf64_Half e_phentsize`. All
* header entries have the same size.
* Starting from the 56th byte a 16-bit value represents the number
* of header entries in the ELF Program header. In the `elf64_hdr`
* struct this value maps to `Elf64_Half e_phnum`.
* Starting from the 54th byte of the ELF Header a 16-bit value indicates the
* size in bytes of one entry in the file's program header table; all entries
* are the same size. In the `elf64_hdr` struct this value maps to the member
* `Elf64_Half e_phentsize`.
*/

// Seek to the byte representing header entry size.
file.seekg(0x36);
uint16_t headerEntrySize;
file.read(reinterpret_cast<char*>(&headerEntrySize), sizeof(headerEntrySize));
uint16_t headerEntries;
file.read(reinterpret_cast<char*>(&headerEntries), sizeof(headerEntries));
file.read(reinterpret_cast<char*>(&e_phentsize_), sizeof(e_phentsize_));

/** Starting from the 56th byte a 16-bit value represents the number
* of program header entries in the ELF Program header table. In the
* `elf64_hdr` struct this value maps to `Elf64_Half e_phnum`.
*/
file.read(reinterpret_cast<char*>(&e_phnum_), sizeof(e_phnum_));

// Resize the header to equal the number of header entries.
headers_.resize(headerEntries);
pheaders_.resize(e_phnum_);
processImageSize_ = 0;

// Loop over all headers and extract them.
for (size_t i = 0; i < headerEntries; i++) {
for (size_t i = 0; i < e_phnum_; i++) {
// Since all headers entries have the same size.
// We can extract the nth header using the header offset
// and header entry size.
file.seekg(headerOffset + (i * headerEntrySize));
auto& header = headers_[i];
file.seekg(e_phoff + (i * e_phentsize_));
auto& header = pheaders_[i];

/**
* Like the ELF Header, the ELF Program header is also defined
Expand Down Expand Up @@ -133,22 +138,30 @@ Elf::Elf(std::string path, char** imagePointer) {

// Each address-related field is 8 bytes in a 64-bit ELF file
const int fieldBytes = 8;
file.read(reinterpret_cast<char*>(&(header.type)), sizeof(header.type));
file.read(reinterpret_cast<char*>(&(header.p_type)), sizeof(header.p_type));
file.seekg(4, std::ios::cur); // Skip flags
file.read(reinterpret_cast<char*>(&(header.offset)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.virtualAddress)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.physicalAddress)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.fileSize)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.memorySize)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.p_offset)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.p_vaddr)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.p_paddr)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.p_filesz)), fieldBytes);
file.read(reinterpret_cast<char*>(&(header.p_memsz)), fieldBytes);
// Skip p_align

// To construct the process we look for the largest virtual address and
// add it to the memory size of the header. This way we obtain a very
// large array which can hold data at large virtual address.
// However, this way we end up creating a sparse array, in which most
// of the entries are unused. Also SimEng internally treats these
// of the entries are unused. Also, SimEng internally treats these
// virtual address as physical addresses to index into this large array.
if (header.virtualAddress + header.memorySize > processImageSize_) {
processImageSize_ = header.virtualAddress + header.memorySize;
if (header.p_vaddr + header.p_memsz > processImageSize_) {
processImageSize_ = header.p_vaddr + header.p_memsz;
}

// Determine the virtual address of the header table in memory from
// individual program headers. Used to populate the auxvec
if (header.p_offset <= e_phoff &&
e_phoff < header.p_offset + header.p_filesz) {
phdrTableAddress_ = header.p_vaddr + (e_phoff - header.p_offset);
}
}

Expand All @@ -162,12 +175,12 @@ Elf::Elf(std::string path, char** imagePointer) {
*/

// Process headers; only observe LOAD sections for this basic implementation
for (const auto& header : headers_) {
if (header.type == 1) { // LOAD
file.seekg(header.offset);
// Read `fileSize` bytes from `file` into the appropriate place in process
for (const auto& header : pheaders_) {
if (header.p_type == 1) { // LOAD
file.seekg(header.p_offset);
// Read `p_filesz` bytes from `file` into the appropriate place in process
// memory
file.read(*imagePointer + header.virtualAddress, header.fileSize);
file.read(*imagePointer + header.p_vaddr, header.p_filesz);
}
}

Expand All @@ -183,4 +196,10 @@ uint64_t Elf::getEntryPoint() const { return entryPoint_; }

bool Elf::isValid() const { return isValid_; }

uint64_t Elf::getPhdrTableAddress() const { return phdrTableAddress_; }

uint64_t Elf::getPhdrEntrySize() const { return e_phentsize_; }

uint64_t Elf::getNumPhdr() const { return e_phnum_; }

} // namespace simeng
22 changes: 20 additions & 2 deletions src/lib/kernel/LinuxProcess.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ LinuxProcess::LinuxProcess(const std::vector<std::string>& commandLine,

entryPoint_ = elf.getEntryPoint();

progHeaderTableAddress_ = elf.getPhdrTableAddress();
progHeaderEntSize_ = elf.getPhdrEntrySize();
numProgHeaders_ = elf.getNumPhdr();

// Align heap start to a 32-byte boundary
heapStart_ = alignToBoundary(elf.getProcessImageSize(), 32);

Expand Down Expand Up @@ -160,9 +164,23 @@ void LinuxProcess::createStack(char** processImage) {

// ELF auxillary vector, keys defined in `uapi/linux/auxvec.h`
// TODO: populate remaining auxillary vector entries
initialStackFrame.push_back(6); // AT_PAGESZ
initialStackFrame.push_back(auxVec::AT_PHDR); // AT_PHDR
initialStackFrame.push_back(progHeaderTableAddress_);

initialStackFrame.push_back(auxVec::AT_PHENT); // AT_PHENT
initialStackFrame.push_back(progHeaderEntSize_);

initialStackFrame.push_back(auxVec::AT_PHNUM); // AT_PHNUM
initialStackFrame.push_back(numProgHeaders_);

initialStackFrame.push_back(auxVec::AT_PAGESZ); // AT_PAGESZ
initialStackFrame.push_back(pageSize_);
initialStackFrame.push_back(0); // null terminator

initialStackFrame.push_back(auxVec::AT_ENTRY); // AT_ENTRY
initialStackFrame.push_back(entryPoint_);

initialStackFrame.push_back(auxVec::AT_NULL); // null terminator
initialStackFrame.push_back(0);

size_t stackFrameSize = initialStackFrame.size() * 8;

Expand Down

0 comments on commit 31681a9

Please sign in to comment.