Skip to content

Commit

Permalink
checkpoint the fp32 CUDA implementation to separate file. our mainlin…
Browse files Browse the repository at this point in the history
…e iteration will now continue in the new (mixed precision) file
  • Loading branch information
karpathy committed Apr 23, 2024
1 parent 6b6ad35 commit 095d276
Show file tree
Hide file tree
Showing 2 changed files with 2,102 additions and 4 deletions.
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ else
endif

# PHONY means these targets will always be executed
.PHONY: all train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu
.PHONY: all train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu train_gpt2fp32cu

# Add targets
TARGETS = train_gpt2 test_gpt2
Expand All @@ -87,16 +87,17 @@ train_gpt2: train_gpt2.c
test_gpt2: test_gpt2.c
$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) -o $@

# possibly may want to disable warnings? e.g. append -Xcompiler -Wno-unused-result
train_gpt2cu: train_gpt2.cu
$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@

train_gpt2fp32cu: train_gpt2_fp32.cu
$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@

test_gpt2cu: test_gpt2.cu
$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@

profile_gpt2cu: profile_gpt2.cu
$(NVCC) $(NVCC_FLAGS) -lineinfo $< $(NVCC_LDFLAGS) -o $@

clean:
rm -f train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu

rm -f train_gpt2 test_gpt2 train_gpt2cu train_gpt2fp32cu test_gpt2cu
Loading

0 comments on commit 095d276

Please sign in to comment.