Skip to content

Commit

Permalink
Remove OpenGL support (part 1)
Browse files Browse the repository at this point in the history
Fixes #5475

This removes the OpenGL backend (but *not* the OpenGLCompute backend) from public use:

- Remove Target::OpenGL
- remove DeviceAPI::GLSL
- remove Func::glsl() and Func::shader()
- remove all OpenGL-specific apps and tests
- remove HalideRuntimeOpenGL.h
- remove some internal code that is OpenGL-only

Note that there is still internal code that needs trimming; since the OpenGLCompute backend uses some of the same code, and some of the same build deps, and some of the same runtime shared-library loading, I tried to err on the side of leaving code/buildrules/etc in place for now, with the plan to clean that up in subsequent PRs.

Note also that feature Target::EGL is still present, as I believe it is still useful in conjunction with OpenGLCompute.
  • Loading branch information
steven-johnson committed Jan 9, 2021
1 parent cc03c9c commit a4fbbe8
Show file tree
Hide file tree
Showing 125 changed files with 61 additions and 7,872 deletions.
1 change: 0 additions & 1 deletion .github/workflows/presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,5 @@ jobs:
(cd test/error && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
(cd test/generator && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
(cd test/failing_with_issue && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
(cd test/opengl && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
(cd test/performance && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
(cd test/warning && comm -23 <(ls *.{c,cpp} | sort) <(grep -P '^\s*#?\s*[A-Za-z0-9_.]+$' CMakeLists.txt | tr -d '# ' | sort) | tee missing_files && [ ! -s missing_files ])
18 changes: 2 additions & 16 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@ jobs:
libpng-dev \
ninja-build
# TODO(srj): OpenGL is only needed to build the opengl tests (which we don't even run)...
sudo apt-get install \
freeglut3-dev \
libglu1-mesa-dev \
mesa-common-dev
- name: Configure MacOS Host
if: startsWith(matrix.host_os, 'macos')
shell: bash
Expand Down Expand Up @@ -193,12 +187,6 @@ jobs:
libjpeg-dev:i386 \
libpng-dev:i386 \
# TODO(srj): OpenGL is only needed to build the opengl tests (which we don't even run)...
sudo apt-get install \
freeglut3-dev:i386 \
libglu1-mesa-dev:i386 \
mesa-common-dev:i386
- name: Configure Arm32 Crosscompilation
if: matrix.target_os == 'linux' && matrix.target_arch == 'arm' && matrix.target_bits == 32
shell: bash
Expand Down Expand Up @@ -449,9 +437,8 @@ jobs:
TEST_GROUPS_SERIAL="tutorial"
# performance is never going to be reliable on VMs.
# opengl won't work on the buildbots.
# auto_schedule is just flaky.
TEST_GROUPS_BROKEN="performance opengl auto_schedule"
TEST_GROUPS_BROKEN="performance auto_schedule"
if [[ ${{matrix.target_bits}} == 32 ]]; then
# TODO: Skip testing apps on 32-bit systems for now;
Expand Down Expand Up @@ -487,9 +474,8 @@ jobs:
TEST_GROUPS_SERIAL="tutorial"
# performance is never going to be reliable on VMs.
# opengl won't work on the buildbots.
# auto_schedule is just flaky.
TEST_GROUPS_BROKEN="performance|opengl|auto_schedule"
TEST_GROUPS_BROKEN="performance|auto_schedule"
export TEST_TMPDIR="${HALIDE_TEMP_DIR}"
cd ${HALIDE_BUILD_DIR}
Expand Down
30 changes: 0 additions & 30 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,6 @@ SOURCE_FILES = \
ImageParam.cpp \
InferArguments.cpp \
InjectHostDevBufferCopies.cpp \
InjectOpenGLIntrinsics.cpp \
Inline.cpp \
InlineReductions.cpp \
IntegerDivisionTable.cpp \
Expand Down Expand Up @@ -560,7 +559,6 @@ SOURCE_FILES = \
UnsafePromises.cpp \
Util.cpp \
Var.cpp \
VaryingAttributes.cpp \
VectorizeLoops.cpp \
WasmExecutor.cpp \
WrapCalls.cpp
Expand Down Expand Up @@ -645,7 +643,6 @@ HEADER_FILES = \
ImageParam.h \
InferArguments.h \
InjectHostDevBufferCopies.h \
InjectOpenGLIntrinsics.h \
Inline.h \
InlineReductions.h \
IntegerDivisionTable.h \
Expand Down Expand Up @@ -728,7 +725,6 @@ HEADER_FILES = \
UnsafePromises.h \
Util.h \
Var.h \
VaryingAttributes.h \
VectorizeLoops.h \
WrapCalls.h

Expand Down Expand Up @@ -779,7 +775,6 @@ RUNTIME_CPP_COMPONENTS = \
msan \
msan_stubs \
opencl \
opengl \
openglcompute \
opengl_egl_context \
opengl_glx_context \
Expand Down Expand Up @@ -851,7 +846,6 @@ RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \
$(INCLUDE_DIR)/HalideRuntimeHexagonDma.h \
$(INCLUDE_DIR)/HalideRuntimeHexagonHost.h \
$(INCLUDE_DIR)/HalideRuntimeOpenCL.h \
$(INCLUDE_DIR)/HalideRuntimeOpenGL.h \
$(INCLUDE_DIR)/HalideRuntimeOpenGLCompute.h \
$(INCLUDE_DIR)/HalideRuntimeMetal.h \
$(INCLUDE_DIR)/HalideRuntimeQurt.h \
Expand Down Expand Up @@ -1110,22 +1104,18 @@ CORRECTNESS_TESTS = $(shell ls $(ROOT_DIR)/test/correctness/*.cpp) $(shell ls $(
PERFORMANCE_TESTS = $(shell ls $(ROOT_DIR)/test/performance/*.cpp)
ERROR_TESTS = $(shell ls $(ROOT_DIR)/test/error/*.cpp)
WARNING_TESTS = $(shell ls $(ROOT_DIR)/test/warning/*.cpp)
OPENGL_TESTS := $(shell ls $(ROOT_DIR)/test/opengl/*.cpp)
GENERATOR_EXTERNAL_TESTS := $(shell ls $(ROOT_DIR)/test/generator/*test.cpp)
GENERATOR_EXTERNAL_TEST_GENERATOR := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp)
TUTORIALS = $(filter-out %_generate.cpp, $(shell ls $(ROOT_DIR)/tutorial/*.cpp))
AUTO_SCHEDULE_TESTS = $(shell ls $(ROOT_DIR)/test/auto_schedule/*.cpp)

-include $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BUILD_DIR)/test_opengl_%.d)

test_correctness: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=quiet_correctness_%) $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.c=quiet_correctness_%)
test_performance: $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=performance_%)
test_error: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%)
test_warning: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%)
test_tutorial: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%)
test_valgrind: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=valgrind_%)
test_avx512: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=avx512_%)
test_opengl: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=opengl_%)
test_auto_schedule: test_mullapudi2016 test_li2018 test_adams2019

.PHONY: test_correctness_multi_gpu
Expand Down Expand Up @@ -1230,7 +1220,6 @@ ALL_TESTS = test_internal test_correctness test_error test_tutorial test_warning
# For generator tests they time the compile time only. The times are recorded in CSV files.
time_compilation_correctness: init_time_compilation_correctness $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=time_compilation_test_%)
time_compilation_performance: init_time_compilation_performance $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=time_compilation_performance_%)
time_compilation_opengl: init_time_compilation_opengl $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=time_compilation_opengl_%)
time_compilation_generator: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%)

init_time_compilation_%:
Expand All @@ -1250,14 +1239,6 @@ build_tests: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=$(BIN_DIR)/c
$(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=$(BIN_DIR)/generator_jit_%) \
$(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=$(BIN_DIR)/auto_schedule_%)

# OpenGL doesn't build on every host platform we support (eg. ARM).
.PHONY: build_opengl_tests
build_opengl_tests: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BIN_DIR)/opengl_%)

ifneq ($(WITH_OPENGL),)
build_tests: build_opengl_tests
endif

clean_generator:
rm -rf $(BIN_DIR)/*.generator
rm -rf $(BIN_DIR)/*/runtime.a
Expand Down Expand Up @@ -1321,9 +1302,6 @@ $(BIN_DIR)/error_%: $(ROOT_DIR)/test/error/%.cpp $(BIN_DIR)/libHalide.$(SHARED_E
$(BIN_DIR)/warning_%: $(ROOT_DIR)/test/warning/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@

$(BIN_DIR)/opengl_%: $(ROOT_DIR)/test/opengl/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h $(INCLUDE_DIR)/HalideRuntimeOpenGL.h
$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -I$(SRC_DIR) $(TEST_LD_FLAGS) $(OPENGL_LD_FLAGS) -o $@ -MMD -MF $(BUILD_DIR)/test_opengl_$*.d

# Auto schedule tests that link against libHalide
$(BIN_DIR)/auto_schedule_%: $(ROOT_DIR)/test/auto_schedule/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
Expand Down Expand Up @@ -1874,11 +1852,6 @@ warning_%: $(BIN_DIR)/warning_%
cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "^Warning"
@-echo

opengl_%: $(BIN_DIR)/opengl_%
@-mkdir -p $(TMP_DIR)
cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1
@-echo

generator_jit_%: $(BIN_DIR)/generator_jit_%
@-mkdir -p $(TMP_DIR)
cd $(TMP_DIR) ; $(CURDIR)/$<
Expand Down Expand Up @@ -1928,9 +1901,6 @@ time_compilation_test_%: $(BIN_DIR)/test_%
time_compilation_performance_%: $(BIN_DIR)/performance_%
$(TIME_COMPILATION) compile_times_performance.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_performance_%=performance_%)

time_compilation_opengl_%: $(BIN_DIR)/opengl_%
$(TIME_COMPILATION) compile_times_opengl.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_opengl_%=opengl_%)

time_compilation_generator_%: $(BIN_DIR)/%.generator
$(TIME_COMPILATION) compile_times_generator.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_generator_%=$(FILTERS_DIR)/%.a)

Expand Down
137 changes: 2 additions & 135 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ currently targets:

- CPU architectures: X86, ARM, MIPS, Hexagon, PowerPC
- Operating systems: Linux, Windows, Mac OS X, Android, iOS, Qualcomm QuRT
- GPU Compute APIs: CUDA, OpenCL, OpenGL, OpenGL Compute Shaders, Apple Metal,
- GPU Compute APIs: CUDA, OpenCL, OpenGL Compute Shaders, Apple Metal,
Microsoft Direct X 12

Rather than being a standalone programming language, Halide is embedded in C++.
Expand Down Expand Up @@ -336,140 +336,7 @@ an older XCode which does not default to libc++.

# Halide OpenGL/GLSL backend

Halide's OpenGL backend offloads image processing operations to the GPU by
generating GLSL-based fragment shaders.

Compared to other GPU-based processing options such as CUDA and OpenCL, OpenGL
has two main advantages: it is available on basically every desktop computer and
mobile device, and it is generally well supported across different hardware
vendors.

The main disadvantage of OpenGL as an image processing framework is that the
computational capabilities of fragment shaders are quite restricted. In general,
the processing model provided by OpenGL is most suitable for filters where each
output pixel can be expressed as a simple function of the input pixels. This
covers a wide range of interesting operations like point-wise filters and
convolutions; but a few common image processing operations such as histograms or
recursive filters are notoriously hard to express in GLSL.

#### Writing OpenGL-Based Filters

To enable code generation for OpenGL, include `opengl` in the target specifier
passed to Halide. Since OpenGL shaders are limited in their computational power,
you must also specify a CPU target for those parts of the filter that cannot or
should not be computed on the GPU. Examples of valid target specifiers are

```
host-opengl
x86-opengl-debug
```

Adding `debug`, as in the second example, adds additional logging output and is
highly recommended during development.

By default, filters compiled for OpenGL targets run completely on the CPU.
Execution on the GPU must be enabled for individual Funcs by appropriate
scheduling calls.

GLSL fragment shaders implicitly iterate over two spatial dimensions x,y and the
color channel. Due to the way color channels handled in GLSL, only filters for
which the color index is a compile-time constant can be scheduled. The main
consequence is that the range of color variables must be explicitly specified
for both input and output buffers before scheduling:

```
ImageParam input;
Func f;
Var x, y, c;
f(x, y, c) = ...;
input.set_bounds(2, 0, 3); // specify color range for input
f.bound(c, 0, 3); // and output
f.glsl(x, y, c);
```

#### JIT Compilation

For JIT compilation Halide attempts to load the system libraries for opengl and
creates a new context to use for each module. Windows is not yet supported.

Examples for JIT execution of OpenGL-based filters can be found in test/opengl.

#### AOT Compilation

When AOT (ahead-of-time) compilation is used, Halide generates OpenGL-enabled
object files that can be linked to and called from a host application. In
general, this is fairly straightforward, but a few things must be taken care of.

On Linux, OS X, and Android, Halide creates its own OpenGL context unless the
current thread already has an active context. On other platforms you have to
link implementations of the following two functions with your Halide code:

```
extern "C" int halide_opengl_create_context(void *) {
return 0; // if successful
}
extern "C" void *halide_opengl_get_proc_addr(void *, const char *name) {
...
}
```

Halide allocates and deletes textures as necessary. Applications may manage the
textures by hand by setting the `halide_buffer_t::device` field; this is most
useful for reusing image data that is already stored in textures. Some
rudimentary checks are performed to ensure that externally allocated textures
have the correct format, but in general that's the responsibility of the
application.

It is possible to let render directly to the current framebuffer; to do this,
set the `dev` field of the output buffer to the value returned by
`halide_opengl_output_client_bound`. The example in apps/HelloAndroidGL
demonstrates this technique.

Some operating systems can delete the OpenGL context of suspended applications.
If this happens, Halide needs to re-initialize itself with the new context after
the application resumes. Call `halide_opengl_context_lost` to reset Halide's
OpenGL state after this has happened.

#### Limitations

The current implementation of the OpenGL backend targets the common subset of
OpenGL 2.0 and OpenGL ES 2.0 which is widely available on both mobile devices
and traditional computers. As a consequence, only a subset of the Halide
language can be scheduled to run using OpenGL. Some important limitations are:

- Reductions cannot be implemented in GLSL and must be run on the CPU.

- OpenGL ES 2.0 only supports uint8 buffers.

Support for floating point texture is available, but requires OpenGL (ES) 3.0
or the texture_float extension, which may not work on all mobile devices.

- OpenGL ES 2.0 has very limited support for integer arithmetic. For maximum
compatibility, consider doing all computations using floating point, even when
using integer textures.

- Only 2D images with 3 or 4 color channels can be scheduled. Images with one or
two channels require OpenGL (ES) 3.0 or the texture_rg extension.

- Not all builtin functions provided by Halide are currently supported, for
example `fast_log`, `fast_exp`, `fast_pow`, `reinterpret`, bit operations,
`random_float`, `random_int` cannot be used in GLSL code.

The maximum texture size in OpenGL is `GL_MAX_TEXTURE_SIZE`, which is often
smaller than the image of interest; on mobile devices, for example,
`GL_MAX_TEXTURE_SIZE` is commonly 2048. Tiling must be used to process larger
images.

Planned features:

- Support for half-float textures and arithmetic

- Support for integer textures and arithmetic

(Note that OpenGL Compute Shaders are supported with a separate OpenGLCompute
backend.)
TODO: update this for OpenGLCompute, which is staying

# Halide for Hexagon HVX

Expand Down
4 changes: 2 additions & 2 deletions README_cmake.md
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,6 @@ apply when `WITH_TESTS=ON`:
| `WITH_TEST_ERROR` | `ON` | enable the expected-error tests |
| `WITH_TEST_WARNING` | `ON` | enable the expected-warning tests |
| `WITH_TEST_PERFORMANCE` | `ON` | enable performance testing |
| `WITH_TEST_OPENGL` | `OFF` | enable the OpenGL tests |
| `WITH_TEST_GENERATOR` | `ON` | enable the AOT generator tests |

The following options enable/disable various LLVM backends (they correspond to
Expand All @@ -416,7 +415,6 @@ The following options enable/disable various Halide-specific backends:
| Option | Default | Description |
| --------------------- | ------- | -------------------------------------- |
| `TARGET_OPENCL` | `ON` | Enable the OpenCL-C backend |
| `TARGET_OPENGL` | `ON` | Enable the OpenGL/GLSL backend |
| `TARGET_METAL` | `ON` | Enable the Metal backend |
| `TARGET_D3D12COMPUTE` | `ON` | Enable the Direct3D 12 Compute backend |

Expand Down Expand Up @@ -466,6 +464,8 @@ If the CMake version is lower than 3.18, the deprecated [`FindCUDA`][findcuda]
module will be used instead. It reads the variable `CUDA_TOOLKIT_ROOT_DIR`
instead of `CUDAToolkit_ROOT` above.

TODO: update this section for OpenGLCompute, which needs some (but maybe not all) of this.

When targeting OpenGL, the [`FindOpenGL`][findopengl] and [`FindX11`][findx11]
modules will be used to link AOT generated binaries. These modules can be
overridden by setting the following variables:
Expand Down
3 changes: 0 additions & 3 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ enable_testing()

# add_subdirectory(HelloAndroid) # TODO(#5374): missing CMake build
# add_subdirectory(HelloAndroidCamera2) # TODO(#5374): missing CMake build
# add_subdirectory(HelloAndroidGL) # TODO(#5374): missing CMake build
# add_subdirectory(HelloMatlab) # TODO(#5374): missing CMake build
# add_subdirectory(HelloPyTorch) # TODO(#5374): missing CMake build
# add_subdirectory(HelloWasm) # TODO(#5374): missing CMake build
Expand All @@ -24,7 +23,6 @@ add_subdirectory(conv_layer)
add_subdirectory(cuda_mat_mul)
add_subdirectory(depthwise_separable_conv)
add_subdirectory(fft)
add_subdirectory(glsl)
add_subdirectory(harris)
# add_subdirectory(hexagon_benchmarks) # TODO(#5374): missing CMake build
# add_subdirectory(hexagon_dma) # TODO(#5374): missing CMake build
Expand All @@ -39,7 +37,6 @@ add_subdirectory(max_filter)
add_subdirectory(nl_means)
# add_subdirectory(nn_ops) # TODO(#5374): missing CMake build
# add_subdirectory(onnx) # TODO(#5374): missing CMake build
# add_subdirectory(opengl_demo) # TODO(#5374): missing CMake build
# add_subdirectory(openglcompute) # TODO(#5374): missing CMake build
add_subdirectory(resize)
# add_subdirectory(resnet_50) # TODO(#5374): missing CMake build
Expand Down
Loading

0 comments on commit a4fbbe8

Please sign in to comment.