Merge branch 'sycl' into inlineasm-litlocal

ayylol · Jan 22, 2025 · 745d9b5 · 745d9b5
2 parents e91210e + 34ef866
commit 745d9b5
Show file tree

Hide file tree

Showing 48 changed files with 1,085 additions and 478 deletions.
diff --git a/.ci/metrics/requirements.lock.txt b/.ci/metrics/requirements.lock.txt
@@ -231,7 +231,7 @@ pygithub==2.5.0 \
     --hash=sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2 \
     --hash=sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf
     # via -r ./requirements.txt
-pyjwt[crypto]==2.10.0 \
+pyjwt[crypto]==2.10.1 \
     --hash=sha256:543b77207db656de204372350926bed5a86201c4cbff159f623f79c7bb487a15 \
     --hash=sha256:7628a7eb7938959ac1b26e819a1df0fd3259505627b575e4bad6d08f76db695c
     # via pygithub

diff --git a/.github/workflows/sycl-containers.yaml b/.github/workflows/sycl-containers.yaml
@@ -51,10 +51,6 @@ jobs:
             file: ubuntu2404_build
             tag: latest
             build_args: ""
-          - name: Build Ubuntu 24.04 oneAPI Docker image
-            file: ubuntu2404_build_oneapi
-            tag: latest
-            build_args: ""
           - name: Intel Drivers Ubuntu 22.04 Docker image
             file: ubuntu2204_intel_drivers
             tag: latest

diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml
@@ -152,19 +152,21 @@ jobs:
         ref: ${{ inputs.build_ref || github.sha }}
         merge_ref: ${{ inputs.merge_ref }}
         cache_path: "/__w/repo_cache/"
+    - name: Setup oneAPI env
+      if: ${{ inputs.cc == 'icx' || inputs.cxx == 'icpx' }}
+      uses: ./devops/actions/setup_linux_oneapi_env
     - name: Configure
       env:
         CC: ${{ inputs.cc }}
         CXX: ${{ inputs.cxx }}
-        ARGS: ${{ inputs.build_configure_extra_args }}
         CUDA_LIB_PATH: "/usr/local/cuda/lib64/stubs"
       run: |
         mkdir -p $CCACHE_DIR
         mkdir -p $GITHUB_WORKSPACE/build
         cd $GITHUB_WORKSPACE/build
         python3 $GITHUB_WORKSPACE/src/buildbot/configure.py -w $GITHUB_WORKSPACE \
           -s $GITHUB_WORKSPACE/src -o $GITHUB_WORKSPACE/build -t Release \
-          --ci-defaults $ARGS \
+          --ci-defaults ${{ inputs.build_configure_extra_args }} \
           --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache \
           --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
           --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \

diff --git a/.github/workflows/sycl-linux-matrix-e2e-on-nightly.yml b/.github/workflows/sycl-linux-matrix-e2e-on-nightly.yml
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
@@ -127,12 +127,9 @@ on:
           - '["cts-cpu"]'
           - '["Linux", "build"]'
       image:
-        description: |
-          Use option ending with ":build" for AMDGPU, ":latest" for the rest.
         type: choice
         options:
-          - 'ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:latest'
-          - 'ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:build'
+          - 'ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest'
       image_options:
         description: |
           Use option with "--device=/dev/kfd" for AMDGPU, without it for the rest.

diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
@@ -37,6 +37,20 @@ jobs:
 
       artifact_archive_name: sycl_linux_shared.tar.zst
 
+  ubuntu2404_oneapi_build:
+    if: github.repository == 'intel/llvm'
+    uses: ./.github/workflows/sycl-linux-build.yml
+    secrets: inherit
+    with:
+      build_cache_root: "/__w/"
+      build_cache_suffix: oneapi
+      build_artifact_suffix: oneapi
+      build_configure_extra_args: --cmake-opt=-DCMAKE_C_FLAGS="-no-intel-lib -ffp-model=precise" --cmake-opt=-DCMAKE_CXX_FLAGS="-no-intel-lib -ffp-model=precise"
+      cc: icx
+      cxx: icpx
+
+      artifact_archive_name: sycl_linux_oneapi.tar.zst
+
   ubuntu2204_test:
     needs: [ubuntu2204_build]
     if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
@@ -103,6 +117,23 @@ jobs:
       sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
 
+  ubuntu2404_oneapi_test:
+    needs: [ubuntu2404_oneapi_build]
+    if: ${{ always() && !cancelled() && needs.ubuntu2404_oneapi_build.outputs.build_conclusion == 'success' }}
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    with:
+      name: Intel PVC L0 oneAPI
+      runner: '["Linux", "pvc"]'
+      target_devices: level_zero:gpu
+      extra_lit_opts: -j 50
+      image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      ref: ${{ github.sha }}
+      merge_ref: ''
+      sycl_toolchain_artifact: sycl_linux_oneapi
+      sycl_toolchain_archive: ${{ needs.ubuntu2404_oneapi_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.ubuntu2404_oneapi_build.outputs.artifact_decompress_command }}
+
   build-win:
     uses: ./.github/workflows/sycl-windows-build.yml
     if: github.repository == 'intel/llvm'

diff --git a/.github/workflows/sycl-windows-build.yml b/.github/workflows/sycl-windows-build.yml
@@ -149,7 +149,7 @@ jobs:
       shell: bash
       run: |
          if [[ ${{inputs.compiler}} == 'icx' ]]; then
-           export LIT_FILTER_OUT="host_tanpi_double_accuracy"
+           export LIT_XFAIL="regression\host_tanpi_double_accuracy.cpp"
          fi
          cmake --build build --target check-sycl
     - name: check-sycl-unittests

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
@@ -2147,6 +2147,22 @@ def SYCLAddIRAnnotationsMember : InheritableAttr {
   let Documentation = [SYCLAddIRAnnotationsMemberDocs];
 }
 
+def SYCLRegisteredKernels : InheritableAttr {
+  let Spellings = [CXX11<"__sycl_detail__", "__registered_kernels__">];
+  let Args = [VariadicExprArgument<"Args">];
+  let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
+  let Subjects = SubjectList<[Empty], ErrorDiag, "Translation Unit Scope">;
+  let AdditionalMembers = SYCLAddIRAttrCommonMembers.MemberCode;
+  let Documentation = [SYCLAddIRAnnotationsMemberDocs];
+}
+
+def SYCLRegisteredKernelName : InheritableAttr {
+  let Spellings = [];
+  let Subjects = SubjectList<[Function]>;
+  let Args = [StringArgument<"RegName">];
+  let Documentation = [InternalOnly];
+}
+
 def C11NoReturn : InheritableAttr {
   let Spellings = [CustomKeyword<"_Noreturn">];
   let Subjects = SubjectList<[Function], ErrorDiag>;

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12546,6 +12546,20 @@ def err_sycl_special_type_num_init_method : Error<
 def warn_launch_bounds_is_cuda_specific : Warning<
   "%0 attribute ignored, only applicable when targeting Nvidia devices">,
   InGroup<IgnoredAttributes>;
+def err_registered_kernels_num_of_args : Error<
+  "'__registered_kernels__' attribute must have at least one argument">;
+def err_registered_kernels_init_list : Error<
+  "argument to the '__registered_kernels__' attribute must be an "
+  "initializer list expression">;
+def err_registered_kernels_init_list_pair_values : Error<
+  "each initializer list argument to the '__registered_kernels__' attribute "
+  "must contain a pair of values">;
+def err_registered_kernels_resolve_function : Error<
+  "unable to resolve free function kernel '%0'">;
+def err_registered_kernels_name_already_registered : Error<
+  "free function kernel has already been registered with '%0'; cannot register with '%1'">;
+def err_not_sycl_free_function : Error<
+  "attempting to register a function that is not a SYCL free function as '%0'">;
 
 def warn_cuda_maxclusterrank_sm_90 : Warning<
   "'maxclusterrank' requires sm_90 or higher, CUDA arch provided: %0, ignoring "

diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h
@@ -252,8 +252,9 @@ class SemaSYCL : public SemaBase {
   // We need to store the list of the sycl_kernel functions and their associated
   // generated OpenCL Kernels so we can go back and re-name these after the
   // fact.
-  llvm::SmallVector<std::pair<const FunctionDecl *, FunctionDecl *>>
-      SyclKernelsToOpenCLKernels;
+  using KernelFDPairs =
+      llvm::SmallVector<std::pair<const FunctionDecl *, FunctionDecl *>>;
+  KernelFDPairs SyclKernelsToOpenCLKernels;
 
   // Used to suppress diagnostics during kernel construction, since these were
   // already emitted earlier. Diagnosing during Kernel emissions also skips the
@@ -296,11 +297,15 @@ class SemaSYCL : public SemaBase {
                               llvm::DenseSet<QualType> Visited,
                               ValueDecl *DeclToCheck);
 
+  const KernelFDPairs &getKernelFDPairs() { return SyclKernelsToOpenCLKernels; }
+
   void addSyclOpenCLKernel(const FunctionDecl *SyclKernel,
                            FunctionDecl *OpenCLKernel) {
     SyclKernelsToOpenCLKernels.emplace_back(SyclKernel, OpenCLKernel);
   }
 
+  void constructFreeFunctionKernel(FunctionDecl *FD, StringRef NameStr = "");
+
   void addSyclDeviceDecl(Decl *d) { SyclDeviceDecls.insert(d); }
   llvm::SetVector<Decl *> &syclDeviceDecls() { return SyclDeviceDecls; }
 
@@ -480,6 +485,7 @@ class SemaSYCL : public SemaBase {
   void handleSYCLIntelMaxWorkGroupsPerMultiprocessor(Decl *D,
                                                      const ParsedAttr &AL);
   void handleSYCLScopeAttr(Decl *D, const ParsedAttr &AL);
+  void handleSYCLRegisteredKernels(Decl *D, const ParsedAttr &AL);
 
   void checkSYCLAddIRAttributesFunctionAttrConflicts(Decl *D);
 
@@ -655,6 +661,10 @@ class SemaSYCL : public SemaBase {
   void addIntelReqdSubGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI,
                                     Expr *E);
   void handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL);
+
+  // Used to check whether the function represented by FD is a SYCL
+  // free function kernel or not.
+  bool isFreeFunction(const FunctionDecl *FD);
 };
 
 } // namespace clang

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -641,6 +641,12 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
 
   llvm::LLVMContext &Context = getLLVMContext();
 
+  if (getLangOpts().SYCLIsDevice)
+    if (FD->hasAttr<SYCLRegisteredKernelNameAttr>())
+      CGM.SYCLAddRegKernelNamePairs(
+          FD->getAttr<SYCLRegisteredKernelNameAttr>()->getRegName(),
+          FD->getNameAsString());
+
   if (FD->hasAttr<OpenCLKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>())
     CGM.GenKernelArgMetadata(Fn, FD, this);
 

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1431,6 +1431,19 @@ void CodeGenModule::Release() {
         AspectEnumValsMD->addOperand(
             getAspectEnumValueMD(Context, TheModule.getContext(), ECD));
     }
+
+    if (!SYCLRegKernelNames.empty()) {
+      std::vector<llvm::Metadata *> Nodes;
+      llvm::LLVMContext &Ctx = TheModule.getContext();
+      for (auto MDKernelNames : SYCLRegKernelNames) {
+        llvm::Metadata *Vals[] = {MDKernelNames.first, MDKernelNames.second};
+        Nodes.push_back(llvm::MDTuple::get(Ctx, Vals));
+      }
+
+      llvm::NamedMDNode *SYCLRegKernelsMD =
+          TheModule.getOrInsertNamedMetadata("sycl_registered_kernels");
+      SYCLRegKernelsMD->addOperand(llvm::MDNode::get(Ctx, Nodes));
+    }
   }
 
   // HLSL related end of code gen work items.

diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
@@ -456,6 +456,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// handled differently than regular annotations so they cannot share map.
   llvm::DenseMap<unsigned, llvm::Constant *> SYCLAnnotationArgs;
 
+  typedef std::pair<llvm::Metadata *, llvm::Metadata *> MetadataPair;
+  SmallVector<MetadataPair, 4> SYCLRegKernelNames;
+
   llvm::StringMap<llvm::GlobalVariable *> CFConstantStringMap;
 
   llvm::DenseMap<llvm::Constant *, llvm::GlobalVariable *> ConstantStringMap;
@@ -1483,6 +1486,12 @@ class CodeGenModule : public CodeGenTypeCache {
   llvm::Constant *EmitSYCLAnnotationArgs(
       SmallVectorImpl<std::pair<std::string, std::string>> &Pairs);
 
+  void SYCLAddRegKernelNamePairs(StringRef First, StringRef Second) {
+    SYCLRegKernelNames.push_back(
+        std::make_pair(llvm::MDString::get(getLLVMContext(), First),
+                       llvm::MDString::get(getLLVMContext(), Second)));
+  }
+
   /// Add attributes from add_ir_attributes_global_variable on TND to GV.
   void AddGlobalSYCLIRAttributes(llvm::GlobalVariable *GV,
                                  const RecordDecl *RD);

diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -7479,6 +7479,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_SYCLAddIRAnnotationsMember:
     S.SYCL().handleSYCLAddIRAnnotationsMemberAttr(D, AL);
     break;
+  case ParsedAttr::AT_SYCLRegisteredKernels:
+    S.SYCL().handleSYCLRegisteredKernels(D, AL);
+    break;
 
   // Swift attributes.
   case ParsedAttr::AT_SwiftAsyncName: