fixup ptx padding to really be a mul of 8 (and just use nulls)

hfinkel · Dec 29, 2019 · 39b5f39 · 39b5f39
1 parent a5170e6
commit 39b5f39
Showing 1 changed file with 4 additions and 2 deletions.
diff --git a/clang/lib/CodeGen/JIT.cpp b/clang/lib/CodeGen/JIT.cpp
@@ -1411,9 +1411,11 @@ struct CompilerData {
       // The outer header of the fat binary is documented in the CUDA
       // fatbinary.h header. As mentioned there, the overall size must be a
       // multiple of eight, and so we must make sure that the PTX is.
-      while (DevCD->DevAsm.size() % 7)
-        DevCD->DevAsm += ' ';
+      // We also need to make sure that the buffer is explicitly null
+      // terminated (cuobjdump, at least, seems to assume that it is).
       DevCD->DevAsm += '\0';
+      while (DevCD->DevAsm.size() % 8)
+        DevCD->DevAsm += '\0';
 
       // NVIDIA, unfortunatly, does not provide full documentation on their
       // fatbin format. There is some information on the outer header block in