Skip to content
This repository has been archived by the owner on Dec 1, 2021. It is now read-only.

Well managed file descriptor and mmap-ed memory #454

Merged
merged 8 commits into from
Sep 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 2 additions & 15 deletions dlk/python/dlk/templates/include/de10_nano.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,6 @@ namespace de10_nano {
//
// TCA
//
uint8_t* mapPhysicalMemory(size_t base, size_t size) {
int fd = open("/dev/mem", O_RDWR | O_SYNC, 0);
if (fd == -1)
throw std::system_error(errno, std::generic_category());
int rw = PROT_READ | PROT_WRITE;
auto* mapped_base = reinterpret_cast<uint8_t*>(mmap(nullptr, size, rw, MAP_SHARED, fd, base));
if (mapped_base == MAP_FAILED)
throw std::system_error(errno, std::generic_category());
return mapped_base;
}

struct Csr {
static constexpr uint32_t start = 0;
static constexpr uint32_t admaInputAddress = 1;
Expand Down Expand Up @@ -275,10 +264,8 @@ void RunTCA(unsigned long input_addr, unsigned long output_addr, unsigned long k

unsigned use_threshold = (thresholds_addr != 0) ? 1 : 0;

static volatile uint32_t* csr = nullptr;
if (csr == nullptr) {
csr = reinterpret_cast<uint32_t*>(mapPhysicalMemory(HPS_TO_FPGA_LW_BASE, 0xFF));
}
static MappedMem csr_mmap(HPS_TO_FPGA_LW_BASE, 0xFF);
static volatile uint32_t* csr = reinterpret_cast<uint32_t*>(csr_mmap.get());
auto tileWidth = 32u;
auto tileHeight = 32u;
auto p = calcParameters(in_h, in_w, in_c, tileWidth, tileHeight, out_c, k_h, k_w, input_addr, kernel_addr, thresholds_addr, output_addr, use_threshold == 1);
Expand Down
149 changes: 34 additions & 115 deletions dlk/python/dlk/templates/include/memdriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,127 +21,46 @@ limitations under the License.
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>

class MappedMem
{
public:
using memtype = volatile void;

MappedMem(unsigned long g_paddr,
uint32_t g_count,
uint32_t g_size)
: mem(nullptr), aligned_size(0)
{
memtype* aligned_vaddr;
unsigned long aligned_paddr;

/* Align address to access size */
g_paddr &= ~(g_size - 1);

aligned_paddr = g_paddr & ~(4096 - 1);
aligned_size = g_paddr - aligned_paddr + (g_count * g_size);
aligned_size = (aligned_size + 4096 - 1) & ~(4096 - 1);

int fd = -1;
if ((fd = open("/dev/mem", O_RDWR, 0)) < 0)
return;

aligned_vaddr = mmap(nullptr,
aligned_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, aligned_paddr);

if (aligned_vaddr == MAP_FAILED) {
printf("Error mapping address %lx\n", aligned_paddr);
return;
#include <memory>
#include <system_error>

class FileDescriptor {
public:
FileDescriptor() : fd(-1) {}
FileDescriptor(int fd) : fd(fd) {}
~FileDescriptor() {
if (fd >= 0) {
close(fd);
}

mem = (memtype *)((uint32_t)aligned_vaddr + (uint32_t)(g_paddr - aligned_paddr));
close(fd);
}

~MappedMem()
{
if(mem != nullptr)
munmap((void*)mem, aligned_size);
}


template<typename T>
memtype Write(T data)
{
T *mem_ptr = (T *) mem;
*mem_ptr = data;
}

template<typename T>
bool Check(T data)
{
T *mem_ptr = (T *) mem;
return *mem_ptr == data;
}


template<typename T>
memtype Read(T &data)
{
T *mem_ptr = (T *) mem;
data = *mem_ptr;
}
operator int() const { return fd; }
private:
int fd;
};


template<typename T>
memtype Write(const T *data, unsigned int size)
{
T *mem_ptr = (T *) mem;
for(unsigned int i = 0; i < size; i++)
*mem_ptr++ = data[i];
}


template<typename T>
bool Check(const T *data, unsigned int size)
{
bool success = true;
T *mem_ptr = (T *) mem;

for(unsigned int i = 0; i < size; i++)
{
success &= (*mem_ptr++ == data[i]);
if(!success)
break;
class MappedMem {
public:
MappedMem(std::size_t base, std::size_t size) : length(0) {
FileDescriptor fd(open("/dev/mem", O_RDWR | O_SYNC));
if (fd == -1) {
return;
}

return success;
}


template<typename T>
memtype Read(T *data, unsigned int size)
{
// volatile T* _data = data;
T *mem_ptr = (T *) mem;
for(unsigned int i = 0; i < size; i++)
data[i] = *mem_ptr++;
int rw = PROT_READ | PROT_WRITE;
ptr = mmap(nullptr, size, rw, MAP_SHARED, fd, base);
if (ptr == MAP_FAILED) {
throw std::system_error(errno, std::generic_category());
}
length = size;
}


memtype* get()
{
return mem;
~MappedMem() {
if (ptr != MAP_FAILED) {
munmap(ptr, length);
}
}


private:
MappedMem();
MappedMem(const MappedMem &);
MappedMem& operator=(const MappedMem &);

private:
memtype *mem;
uint32_t aligned_size;
void* get() const { return ptr; }
private:
void* ptr;
std::size_t length;
};


#endif
26 changes: 5 additions & 21 deletions dlk/python/dlk/templates/src/network.tpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,9 @@ limitations under the License.
#include "operators.h"

#ifdef RUN_ON_FPGA
#include <sys/mman.h>
#include <cstdint>
#include <fcntl.h>
#include <unistd.h>
#include "memdriver.h"
#endif

namespace {

uint8_t* mapPhysicalMemory(size_t base, size_t size) {
int fd = open("/dev/mem", O_RDWR | O_SYNC, 0);
if (fd == -1)
throw std::system_error(errno, std::generic_category());
int rw = PROT_READ | PROT_WRITE;
auto* mapped_base = reinterpret_cast<uint8_t*>(mmap(nullptr, size, rw, MAP_SHARED, fd, base));
if (mapped_base == MAP_FAILED)
throw std::system_error(errno, std::generic_category());
return mapped_base;
}

} // namespace

{% if config.debug -%}
#include "c2numpy.h"

Expand Down Expand Up @@ -259,13 +241,15 @@ bool Network::init()
{{ '\n' -}}

#if defined RUN_ON_FPGA
auto* kernel_buffer = mapPhysicalMemory(KERNEL_ADDR, total_kernel_size);
MappedMem kernel_mmap(KERNEL_ADDR, total_kernel_size);
auto kernel_buffer = reinterpret_cast<uint8_t*>(kernel_mmap.get());
{% for qconv in graph.convs(quantized_only=True) -%}
{% set kernel = qconv.input_nodes[1] -%}
std::memcpy(kernel_buffer + {{qconv.name}}_kernel_offset, {{kernel.name}}.data(), {{qconv.name}}_kernel_size);
{% endfor -%}

auto* thresholds_buffer = mapPhysicalMemory(THRESHOLD_ADDR, total_thresholds_size);
MappedMem thresholds_mmap(THRESHOLD_ADDR, total_thresholds_size);
auto thresholds_buffer = reinterpret_cast<uint8_t*>(thresholds_mmap.get());
{% for qconv in graph.convs(quantized_only=True) -%}
{% if qconv.has_thresholds -%}
{% set thresholds = qconv.thresholds -%}
Expand Down