Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into atomic_vectorization
Browse files Browse the repository at this point in the history
  • Loading branch information
abadams committed Jun 11, 2020
2 parents 2d3216d + 5c52122 commit 9257ab4
Show file tree
Hide file tree
Showing 23 changed files with 360 additions and 208 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ GENERATOR_AOTWASM_TESTS := $(filter-out generator_aotwasm_memory_profiler_mandel
test_aotwasm_generator: $(GENERATOR_AOTWASM_TESTS)

# This is just a test to ensure than RunGen builds and links for a critical mass of Generators;
# not all will work directly (e.g. due to missing define_externs at link time), so we blacklist
# not all will work directly (e.g. due to missing define_externs at link time), so we disable
# those known to be broken for plausible reasons.
GENERATOR_BUILD_RUNGEN_TESTS = $(GENERATOR_EXTERNAL_TEST_GENERATOR:$(ROOT_DIR)/test/generator/%_generator.cpp=$(FILTERS_DIR)/%.rungen)
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/async_parallel.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary
For a 32-bit build, run:

```
D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x86
D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x86_amd64
```

#### Managing dependencies with vcpkg
Expand Down
4 changes: 2 additions & 2 deletions README_webassembly.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Note that while some of these limitations may be improved in the future, some
are effectively intrinsic to the nature of this problem. Realistically, this JIT
implementation is intended solely for running Halide self-tests (and even then,
a number of them are fundamentally impractical to support in a hosted-Wasm
environment and are blacklisted).
environment and are disabled).

In sum: don't plan on using Halide JIT mode with Wasm unless you are working on
the Halide library itself.
Expand Down Expand Up @@ -155,7 +155,7 @@ will), you need to install Emscripten and a shell for running wasm+js code
- To run the AOT tests, set `HL_TARGET=wasm-32-wasmrt` and build the
`test_aotwasm_generator` target. (Note that the normal AOT tests won't run
usefully with this target, as extra logic to run under a wasm-enabled shell is
required, and some tests are blacklisted.)
required, and some tests are disabled.)

# Running benchmarks

Expand Down
15 changes: 13 additions & 2 deletions apps/camera_pipe/camera_pipe_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,22 @@ void CameraPipe::generate() {

Var xi, yi, xii, xio;

/* 1391us on a gtx 980. */
/* These tile factors obtain 1391us on a gtx 980. */
int tile_x = 28;
int tile_y = 12;

if (get_target().has_feature(Target::D3D12Compute)) {
// D3D12 SM 5.1 can only utilize a limited amount of
// shared memory, so we use a slightly smaller
// tile size.
tile_x = 20;
tile_y = 12;
}

processed.compute_root()
.reorder(c, x, y)
.unroll(x, 2)
.gpu_tile(x, y, xi, yi, 28, 12);
.gpu_tile(x, y, xi, yi, tile_x, tile_y);

curved.compute_at(processed, x)
.unroll(x, 2)
Expand Down
7 changes: 4 additions & 3 deletions apps/hist/hist_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,11 @@ class Hist : public Halide::Generator<Hist> {
// along the z dimension.
hist_rows.update().gpu_tile(x, y, xi, yi, 32, 8);

if (!get_target().has_feature(Target::Metal)) {
if (!get_target().has_feature(Target::Metal) &&
!get_target().has_feature(Target::D3D12Compute)) {
// bound_extent doesn't currently work inside
// metal kernels because we can't compile the
// assertion. For metal we just inline the
// metal & d3d12compute kernels because we can't compile the
// assertion. For metal & d3d12compute we just inline the
// luma computation.
Y.clone_in(intm)
.compute_at(intm.in(), y)
Expand Down
4 changes: 3 additions & 1 deletion cmake/HalideGeneratorHelpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ function(add_halide_library TARGET)
# On Linux, RPATH allows the generator to find Halide, but we need to add it to the PATH on Windows.
set(generatorCommand ${ARG_FROM})
if (WIN32)
set(generatorCommand ${CMAKE_COMMAND} -E env "PATH=$<SHELL_PATH:$<TARGET_FILE_DIR:Halide::Halide>>" "$<TARGET_FILE:${ARG_FROM}>")
set(newPath "$<TARGET_FILE_DIR:Halide::Halide>" $ENV{PATH})
string(REPLACE ";" "$<SEMICOLON>" newPath "${newPath}")
set(generatorCommand ${CMAKE_COMMAND} -E env "PATH=$<SHELL_PATH:${newPath}>" "$<TARGET_FILE:${ARG_FROM}>")
endif ()

# The output file name might not match the host when cross compiling.
Expand Down
2 changes: 1 addition & 1 deletion cmake/HalideTestHelpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ if (NOT TARGET Halide::Test)
add_library(Halide::Test ALIAS Halide_test)

# Obviously, link to the main library
target_link_libraries(Halide_test INTERFACE Halide::Halide)
target_link_libraries(Halide_test INTERFACE Halide::Halide Threads::Threads)

# Everyone gets to see the common headers
target_include_directories(Halide_test
Expand Down
16 changes: 13 additions & 3 deletions dependencies/llvm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,23 @@ endif ()

##
# Finish setting up llvm library
#
# Ideally, we would use llvm_config (instead of hardcoding LLVM lib name below):
# if (LLVM_USE_SHARED_LLVM_LIBRARY)
# set(LLVM_USE_SHARED "USE_SHARED")
# endif()
# llvm_config(Halide_LLVM ${LLVM_USE_SHARED} ${LLVM_COMPONENTS})
# However, llvm_config (LLVM 10.0) does not accept INTERFACE_LIBRARY targets,
# so the below code does what llvm_config() does, with the slight difference
# that we link exclusively to the shared library without fallback to static
# libraries for symbols not resolved by the shared library.
##

if (LLVM_USE_SHARED_LLVM_LIBRARY)
set(LLVM_USE_SHARED "USE_SHARED")
set(LLVM_LIBNAMES LLVM)
else ()
llvm_map_components_to_libnames(LLVM_LIBNAMES ${LLVM_COMPONENTS})
endif ()

llvm_map_components_to_libnames(LLVM_LIBNAMES ${LLVM_COMPONENTS})
target_link_libraries(Halide_LLVM INTERFACE ${LLVM_LIBNAMES})

##
Expand Down
80 changes: 70 additions & 10 deletions src/CodeGen_D3D12Compute_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -896,22 +896,82 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::add_kernel(Stmt s,
constants[i].size += constants[i - 1].size;
}

// Find all the shared allocations and declare them at global scope.
class FindSharedAllocations : public IRVisitor {
using IRVisitor::visit;
void visit(const Allocate *op) override {
op->body.accept(this);
// Find all the shared allocations, uniquify their names,
// and declare them at global scope.
class FindSharedAllocationsAndUniquify : public IRMutator {
using IRMutator::visit;
Stmt visit(const Allocate *op) override {
if (is_shared_allocation(op)) {
allocs.push_back(op);
// Because these will go in global scope,
// we need to ensure they have unique names.
std::string new_name = unique_name(op->name);
replacements[op->name] = new_name;

std::vector<Expr> new_extents;
for (size_t i = 0; i < op->extents.size(); i++) {
new_extents.push_back(mutate(op->extents[i]));
}
Stmt new_body = mutate(op->body);
Expr new_condition = mutate(op->condition);
Expr new_new_expr;
if (op->new_expr.defined()) {
new_new_expr = mutate(op->new_expr);
}

Stmt new_alloc = Allocate::make(new_name, op->type, op->memory_type, new_extents,
std::move(new_condition), std::move(new_body),
std::move(new_new_expr), op->free_function);

allocs.push_back(new_alloc);
replacements.erase(op->name);
return new_alloc;
} else {
return IRMutator::visit(op);
}
}

Stmt visit(const Free *op) override {
auto it = replacements.find(op->name);
if (it != replacements.end()) {
return Free::make(it->second);
} else {
return IRMutator::visit(op);
}
}

Expr visit(const Load *op) override {
auto it = replacements.find(op->name);
if (it != replacements.end()) {
return Load::make(op->type, it->second,
mutate(op->index), op->image, op->param,
mutate(op->predicate), op->alignment);
} else {
return IRMutator::visit(op);
}
}

Stmt visit(const Store *op) override {
auto it = replacements.find(op->name);
if (it != replacements.end()) {
return Store::make(it->second, mutate(op->value),
mutate(op->index), op->param,
mutate(op->predicate), op->alignment);
} else {
return IRMutator::visit(op);
}
}

std::map<string, string> replacements;
friend class CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C;
vector<const Allocate *> allocs;
vector<Stmt> allocs;
};
FindSharedAllocations fsa;
s.accept(&fsa);

FindSharedAllocationsAndUniquify fsa;
s = fsa.mutate(s);

uint32_t total_shared_bytes = 0;
for (const Allocate *op : fsa.allocs) {
for (Stmt sop : fsa.allocs) {
const Allocate *op = sop.as<Allocate>();
internal_assert(op->extents.size() == 1);
internal_assert(op->type.lanes() == 1);
// In D3D12/HLSL, only 32bit types (int/uint/float) are suppoerted (even
Expand Down
Loading

0 comments on commit 9257ab4

Please sign in to comment.