Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cuda : refactor into multiple files #6269

Merged
merged 5 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Checks: >
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-uppercase-literal-suffix,
-readability-simplify-boolean-expr,
clang-analyzer-*,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*,
Expand Down
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,9 @@ if (LLAMA_CUBLAS)
enable_language(CUDA)

set(GGML_HEADERS_CUDA ggml-cuda.h)
set(GGML_SOURCES_CUDA ggml-cuda.cu)

file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")

add_compile_definitions(GGML_USE_CUBLAS)
if (LLAMA_CUDA_FORCE_DMMV)
Expand Down Expand Up @@ -519,7 +521,9 @@ if (LLAMA_HIPBLAS)
message(STATUS "HIP and hipBLAS found")

set(GGML_HEADERS_ROCM ggml-cuda.h)
set(GGML_SOURCES_ROCM ggml-cuda.cu)

file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")

add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)

Expand All @@ -543,7 +547,7 @@ if (LLAMA_HIPBLAS)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})

set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)

if (LLAMA_STATIC)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
Expand Down
23 changes: 20 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ ifdef LLAMA_CUBLAS
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
OBJS += ggml-cuda.o
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
MK_NVCCFLAGS += -use_fast_math
ifdef LLAMA_FATAL_WARNINGS
MK_NVCCFLAGS += -Werror all-warnings
Expand Down Expand Up @@ -458,12 +459,23 @@ endif # LLAMA_CUDA_NO_PEER_COPY
ifdef LLAMA_CUDA_CCBIN
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
endif
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-common.h

ifdef JETSON_EOL_MODULE_DETECT
define NVCC_COMPILE
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef # NVCC_COMPILE
else
define NVCC_COMPILE
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef # NVCC_COMPILE
endif # JETSON_EOL_MODULE_DETECT

ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
$(NVCC_COMPILE)

ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
$(NVCC_COMPILE)

endif # LLAMA_CUBLAS

ifdef LLAMA_CLBLAST
Expand Down Expand Up @@ -510,7 +522,6 @@ ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
endif # LLAMA_VULKAN

ifdef LLAMA_HIPBLAS

ifeq ($(wildcard /opt/rocm),)
ROCM_PATH ?= /usr
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
Expand Down Expand Up @@ -539,8 +550,13 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
endif # LLAMA_CUDA_NO_PEER_COPY
OBJS += ggml-cuda.o
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<

ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<

endif # LLAMA_HIPBLAS

ifdef LLAMA_METAL
Expand Down Expand Up @@ -687,6 +703,7 @@ libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)

clean:
rm -vrf *.o tests/*.o *.so *.a *.dll benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
rm -vrf ggml-cuda/*.o
find examples pocs -type f -name "*.o" -delete

#
Expand Down
Loading
Loading