checkpoint the fp32 CUDA implementation to separate file. our mainlin…

…e iteration will now continue in the new (mixed precision) file
karpathy · Apr 23, 2024 · 095d276 · 095d276
1 parent 6b6ad35
commit 095d276
Show file tree

Hide file tree

Showing 2 changed files with 2,102 additions and 4 deletions.
diff --git a/Makefile b/Makefile
@@ -66,7 +66,7 @@ else
 endif
 
 # PHONY means these targets will always be executed
-.PHONY: all train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu
+.PHONY: all train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu train_gpt2fp32cu
 
 # Add targets
 TARGETS = train_gpt2 test_gpt2
@@ -87,16 +87,17 @@ train_gpt2: train_gpt2.c
 test_gpt2: test_gpt2.c
 	$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) -o $@
 
-# possibly may want to disable warnings? e.g. append -Xcompiler -Wno-unused-result
 train_gpt2cu: train_gpt2.cu
 	$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@
 
+train_gpt2fp32cu: train_gpt2_fp32.cu
+	$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@
+
 test_gpt2cu: test_gpt2.cu
 	$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@
 
 profile_gpt2cu: profile_gpt2.cu
 	$(NVCC) $(NVCC_FLAGS) -lineinfo $< $(NVCC_LDFLAGS) -o $@
 
 clean:
-	rm -f train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu
-
+	rm -f train_gpt2 test_gpt2 train_gpt2cu train_gpt2fp32cu test_gpt2cu